/*-
       * SPDX-License-Identifier: BSD-3-Clause
       *
       * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
       * Copyright (c) 2010-2011 Juniper Networks, Inc.
       * All rights reserved.
       *
       * Portions of this software were developed by Robert N. M. Watson under
       * contract to Juniper Networks, Inc.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the project nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $
       */
      
      /*-
       * Copyright (c) 1982, 1986, 1991, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)in_pcb.c        8.2 (Berkeley) 1/4/94
       */
      
      #include <sys/cdefs.h>
      __FBSDID("$FreeBSD$");
      
      #include "opt_inet.h"
      #include "opt_inet6.h"
      #include "opt_ipsec.h"
      #include "opt_pcbgroup.h"
      #include "opt_rss.h"
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/malloc.h>
      #include <sys/mbuf.h>
      #include <sys/domain.h>
      #include <sys/protosw.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/sockio.h>
      #include <sys/errno.h>
      #include <sys/time.h>
      #include <sys/priv.h>
      #include <sys/proc.h>
      #include <sys/jail.h>
      
      #include <vm/uma.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      #include <net/if_llatbl.h>
      #include <net/if_types.h>
      #include <net/route.h>
      #include <net/route/nhop.h>
      
      #include <netinet/in.h>
      #include <netinet/in_var.h>
      #include <netinet/in_systm.h>
      #include <netinet/tcp_var.h>
      #include <netinet/ip6.h>
      #include <netinet/ip_var.h>
      
      #include <netinet6/ip6_var.h>
      #include <netinet6/nd6.h>
      #include <netinet/in_pcb.h>
      #include <netinet6/in6_pcb.h>
      #include <netinet6/in6_fib.h>
      #include <netinet6/scope6_var.h>
      
      int
      in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
          struct ucred *cred)
   92 {
              struct socket *so = inp->inp_socket;
              struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
              struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
              u_short        lport = 0;
              int error, lookupflags = 0;
              int reuseport = (so->so_options & SO_REUSEPORT);
      
              /*
               * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
               * so that we don't have to add to the (already messy) code below.
               */
              int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
      
              INP_WLOCK_ASSERT(inp);
              INP_HASH_WLOCK_ASSERT(pcbinfo);
      
              if (CK_STAILQ_EMPTY(&V_in6_ifaddrhead))        /* XXX broken! */
                      return (EADDRNOTAVAIL);
    2         if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
                      return (EINVAL);
              if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
                      lookupflags = INPLOOKUP_WILDCARD;
              if (nam == NULL) {
   35                 if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
                          ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
                              return (error);
              } else {
                      sin6 = (struct sockaddr_in6 *)nam;
    2                 if (nam->sa_len != sizeof(*sin6))
                              return (EINVAL);
                      /*
                       * family check.
                       */
    3                 if (nam->sa_family != AF_INET6)
                              return (EAFNOSUPPORT);
      
    2                 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
                              return(error);
      
                      if ((error = prison_local_ip6(cred, &sin6->sin6_addr,
                          ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
                              return (error);
      
                      lport = sin6->sin6_port;
                      if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
                              /*
                               * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
                               * allow compepte duplication of binding if
                               * SO_REUSEPORT is set, or if SO_REUSEADDR is set
                               * and a multicast address is bound on both
                               * new and duplicated sockets.
                               */
   18                         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
                                      reuseport = SO_REUSEADDR|SO_REUSEPORT;
                              /*
                               * XXX: How to deal with SO_REUSEPORT_LB here?
                               * Treat same as SO_REUSEPORT for now.
                               */
                              if ((so->so_options &
                                  (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
                                      reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
   34                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                              struct epoch_tracker et;
                              struct ifaddr *ifa;
      
                              sin6->sin6_port = 0;                /* yech... */
                              NET_EPOCH_ENTER(et);
    4                         if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
                                  NULL &&
                                  (inp->inp_flags & INP_BINDANY) == 0) {
                                      NET_EPOCH_EXIT(et);
                                      return (EADDRNOTAVAIL);
                              }
      
                              /*
                               * XXX: bind to an anycast address might accidentally
                               * cause sending a packet with anycast source address.
                               * We should allow to bind to a deprecated address, since
                               * the application dares to use it.
                               */
   14                         if (ifa != NULL &&
                                  ((struct in6_ifaddr *)ifa)->ia6_flags &
                                  (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
                                      NET_EPOCH_EXIT(et);
                                      return (EADDRNOTAVAIL);
                              }
                              NET_EPOCH_EXIT(et);
                      }
                      if (lport) {
                              struct inpcb *t;
                              struct tcptw *tw;
      
                              /* GROSS */
   39                         if (ntohs(lport) <= V_ipport_reservedhigh &&
                                  ntohs(lport) >= V_ipport_reservedlow &&
                                  priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
                                      return (EACCES);
   28                         if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
                                  priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
                                      t = in6_pcblookup_local(pcbinfo,
                                          &sin6->sin6_addr, lport,
                                          INPLOOKUP_WILDCARD, cred);
   12                                 if (t &&
                                          ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
                                          ((t->inp_flags & INP_TIMEWAIT) == 0) &&
    6                                     (so->so_type != SOCK_STREAM ||
    3                                      IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
    2                                     (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
    4                                      !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
    3                                      (t->inp_flags2 & INP_REUSEPORT) ||
                                           (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
                                          (inp->inp_cred->cr_uid !=
                                           t->inp_cred->cr_uid))
                                              return (EADDRINUSE);
      
                                      /*
                                       * If the socket is a BINDMULTI socket, then
                                       * the credentials need to match and the
                                       * original socket also has to have been bound
                                       * with BINDMULTI.
                                       */
    5                                 if (t && (! in_pcbbind_check_bindmulti(inp, t)))
                                              return (EADDRINUSE);
      
      #ifdef INET
    6                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
                                          IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                                              struct sockaddr_in sin;
      
                                              in6_sin6_2_sin(&sin, sin6);
                                              t = in_pcblookup_local(pcbinfo,
                                                  sin.sin_addr, lport,
                                                  INPLOOKUP_WILDCARD, cred);
    2                                         if (t &&
                                                  ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
                                                  ((t->inp_flags &
                                                    INP_TIMEWAIT) == 0) &&
    1                                             (so->so_type != SOCK_STREAM ||
                                                   ntohl(t->inp_faddr.s_addr) ==
                                                    INADDR_ANY) &&
                                                  (inp->inp_cred->cr_uid !=
                                                   t->inp_cred->cr_uid))
                                                      return (EADDRINUSE);
      
    1                                         if (t && (! in_pcbbind_check_bindmulti(inp, t)))
                                                      return (EADDRINUSE);
                                      }
      #endif
                              }
                              t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
                                  lport, lookupflags, cred);
   27                         if (t && (t->inp_flags & INP_TIMEWAIT)) {
                                      /*
                                       * XXXRW: If an incpb has had its timewait
                                       * state recycled, we treat the address as
                                       * being in use (for now).  This is better
                                       * than a panic, but not desirable.
                                       */
                                      tw = intotw(t);
                                      if (tw == NULL ||
                                          ((reuseport & tw->tw_so_options) == 0 &&
                                               (reuseport_lb & tw->tw_so_options) == 0))
                                              return (EADDRINUSE);
   12                         } else if (t && (reuseport & inp_so_options(t)) == 0 &&
                                                 (reuseport_lb & inp_so_options(t)) == 0) {
                                      return (EADDRINUSE);
                              }
      #ifdef INET
   22                         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
    1                             IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                                      struct sockaddr_in sin;
      
                                      in6_sin6_2_sin(&sin, sin6);
                                      t = in_pcblookup_local(pcbinfo, sin.sin_addr,
                                         lport, lookupflags, cred);
    3                                 if (t && t->inp_flags & INP_TIMEWAIT) {
                                              tw = intotw(t);
                                              if (tw == NULL)
                                                      return (EADDRINUSE);
                                              if ((reuseport & tw->tw_so_options) == 0
                                                  && (reuseport_lb & tw->tw_so_options) == 0
                                                  && (ntohl(t->inp_laddr.s_addr) !=
                                                      INADDR_ANY || ((inp->inp_vflag &
                                                              INP_IPV6PROTO) ==
                                                          (t->inp_vflag & INP_IPV6PROTO))))
                                                      return (EADDRINUSE);
    1                                 } else if (t &&
                                          (reuseport & inp_so_options(t)) == 0 &&
                                          (reuseport_lb & inp_so_options(t)) == 0 &&
    2                                     (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
                                              (t->inp_vflag & INP_IPV6PROTO) != 0)) {
                                              return (EADDRINUSE);
                                      }
                              }
      #endif
                      }
    6                 inp->in6p_laddr = sin6->sin6_addr;
              }
              if (lport == 0) {
   40                 if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) {
                              /* Undo an address bind that may have occurred. */
    1                         inp->in6p_laddr = in6addr_any;
                              return (error);
                      }
              } else {
                      inp->inp_lport = lport;
   26                 if (in_pcbinshash(inp) != 0) {
                              inp->in6p_laddr = in6addr_any;
                              inp->inp_lport = 0;
                              return (EAGAIN);
                      }
              }
              return (0);
      }
      
      /*
       *   Transform old in6_pcbconnect() into an inner subroutine for new
       *   in6_pcbconnect(): Do some validity-checking on the remote
       *   address (in mbuf 'nam') and then determine local host address
       *   (i.e., which interface) to use to access that remote host.
       *
       *   This preserves definition of in6_pcbconnect(), while supporting a
       *   slightly different version for T/TCP.  (This is more than
       *   a bit of a kludge, but cleaning up the internal interfaces would
       *   have forced minor changes in every protocol).
       */
      static int
      in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
          struct in6_addr *plocal_addr6)
      {
              struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
              int error = 0;
              int scope_ambiguous = 0;
              struct in6_addr in6a;
      
              INP_WLOCK_ASSERT(inp);
              INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);        /* XXXRW: why? */
      
    1         if (nam->sa_len != sizeof (*sin6))
                      return (EINVAL);
    2         if (sin6->sin6_family != AF_INET6)
                      return (EAFNOSUPPORT);
    2         if (sin6->sin6_port == 0)
                      return (EADDRNOTAVAIL);
      
 1837         if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
                      scope_ambiguous = 1;
    3         if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
                      return(error);
      
              if (!CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) {
                      /*
                       * If the destination address is UNSPECIFIED addr,
                       * use the loopback addr, e.g ::1.
                       */
 1812                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
   22                         sin6->sin6_addr = in6addr_loopback;
              }
              if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
                      return (error);
      
              error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
                  inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
   12         if (error)
                      return (error);
      
              /*
               * Do not update this earlier, in case we return with an error.
               *
               * XXX: this in6_selectsrc_socket result might replace the bound local
               * address with the address specified by setsockopt(IPV6_PKTINFO).
               * Is it the intended behavior?
               */
              *plocal_addr6 = in6a;
      
              /*
               * Don't do pcblookup call here; return interface in
               * plocal_addr6
               * and exit to caller, that will do the lookup.
               */
      
              return (0);
      }
      
      /*
       * Outer subroutine:
       * Connect from a socket to a specified address.
       * Both address and port must be specified in argument sin.
       * If don't have a local address for this socket yet,
       * then pick one.
       */
      int
      in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
          struct ucred *cred, struct mbuf *m, bool rehash)
 1841 {
              struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
              struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
              struct sockaddr_in6 laddr6;
              int error;
      
              bzero(&laddr6, sizeof(laddr6));
              laddr6.sin6_family = AF_INET6;
      
              INP_WLOCK_ASSERT(inp);
              INP_HASH_WLOCK_ASSERT(pcbinfo);
      
              /*
               * Call inner routine, to assign local interface address.
               * in6_pcbladdr() may automatically fill in sin6_scope_id.
               */
   58         if ((error = in6_pcbladdr(inp, nam, &laddr6.sin6_addr)) != 0)
                      return (error);
      
              if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
                                     sin6->sin6_port,
 1823                               IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
                                    ? &laddr6.sin6_addr : &inp->in6p_laddr,
                                    inp->inp_lport, 0, NULL) != NULL) {
                      return (EADDRINUSE);
              }
   11         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
    2                 if (inp->inp_lport == 0) {
                              /*
                               * rehash was required to be true in the past for
                               * this case; retain that convention.  However,
                               * we now call in_pcb_lport_dest rather than
                               * in6_pcbbind; the former does not insert into
                               * the hash table, the latter does.  Change rehash
                               * to false to do the in_pcbinshash below.
                               */
                              KASSERT(rehash == true,
                                  ("Rehashing required for unbound inps"));
                              rehash = false;
                              error = in_pcb_lport_dest(inp,
                                  (struct sockaddr *) &laddr6, &inp->inp_lport,
                                  (struct sockaddr *) sin6, sin6->sin6_port, cred, 0);
 1811                         if (error)
                                      return (error);
                      }
                      inp->in6p_laddr = laddr6.sin6_addr;
              }
              inp->in6p_faddr = sin6->sin6_addr;
              inp->inp_fport = sin6->sin6_port;
              /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
              inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
    1         if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
                      inp->inp_flow |=
 1822                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
      
              if (rehash) {
   13                 in_pcbrehash_mbuf(inp, m);
              } else {
 1811                 in_pcbinshash_mbuf(inp, m);
              }
      
              return (0);
      }
      
      int
      in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 1841 {
      
              return (in6_pcbconnect_mbuf(inp, nam, cred, NULL, true));
      }
      
      void
      in6_pcbdisconnect(struct inpcb *inp)
    8 {
      
              INP_WLOCK_ASSERT(inp);
              INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
      
              bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
              inp->inp_fport = 0;
              /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
              inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
              in_pcbrehash(inp);
      }
      
      struct sockaddr *
      in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
      {
              struct sockaddr_in6 *sin6;
      
              sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK);
              bzero(sin6, sizeof *sin6);
              sin6->sin6_family = AF_INET6;
              sin6->sin6_len = sizeof(*sin6);
              sin6->sin6_port = port;
              sin6->sin6_addr = *addr_p;
              (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
      
              return (struct sockaddr *)sin6;
      }
      
      struct sockaddr *
      in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
      {
              struct sockaddr_in sin;
              struct sockaddr_in6 *sin6_p;
      
              bzero(&sin, sizeof sin);
              sin.sin_family = AF_INET;
              sin.sin_len = sizeof(sin);
              sin.sin_port = port;
              sin.sin_addr = *addr_p;
      
              sin6_p = malloc(sizeof *sin6_p, M_SONAME,
                      M_WAITOK);
              in6_sin_2_v4mapsin6(&sin, sin6_p);
      
              return (struct sockaddr *)sin6_p;
      }
      
      int
      in6_getsockaddr(struct socket *so, struct sockaddr **nam)
    5 {
              struct inpcb *inp;
              struct in6_addr addr;
              in_port_t port;
      
              inp = sotoinpcb(so);
              KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
      
    5         INP_RLOCK(inp);
              port = inp->inp_lport;
              addr = inp->in6p_laddr;
              INP_RUNLOCK(inp);
      
              *nam = in6_sockaddr(port, &addr);
              return 0;
      }
      
      int
      in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
    4 {
              struct inpcb *inp;
              struct in6_addr addr;
              in_port_t port;
      
              inp = sotoinpcb(so);
              KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
      
    4         INP_RLOCK(inp);
              port = inp->inp_fport;
              addr = inp->in6p_faddr;
              INP_RUNLOCK(inp);
      
              *nam = in6_sockaddr(port, &addr);
              return 0;
      }
      
      int
      in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
    5 {
              struct        inpcb *inp;
              int        error;
      
              inp = sotoinpcb(so);
              KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
      
      #ifdef INET
              if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
                      error = in_getsockaddr(so, nam);
                      if (error == 0)
    1                         in6_sin_2_v4mapsin6_in_sock(nam);
              } else
      #endif
              {
                      /* scope issues will be handled in in6_getsockaddr(). */
    4                 error = in6_getsockaddr(so, nam);
              }
      
              return error;
      }
      
      int
      in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
    3 {
              struct        inpcb *inp;
              int        error;
      
              inp = sotoinpcb(so);
              KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
      
      #ifdef INET
              if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
                      error = in_getpeeraddr(so, nam);
                      if (error == 0)
    1                         in6_sin_2_v4mapsin6_in_sock(nam);
              } else
      #endif
              /* scope issues will be handled in in6_getpeeraddr(). */
    2         error = in6_getpeeraddr(so, nam);
      
              return error;
      }
      
      /*
       * Pass some notification to all connections of a protocol
       * associated with address dst.  The local address and/or port numbers
       * may be specified to limit the search.  The "usual action" will be
       * taken, depending on the ctlinput cmd.  The caller must filter any
       * cmds that are uninteresting (e.g., no error in the map).
       * Call the protocol specific routine (if any) to report
       * any errors for each matching socket.
       */
      void
      in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
          u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
          int cmd, void *cmdarg,
          struct inpcb *(*notify)(struct inpcb *, int))
   13 {
              struct inpcb *inp, *inp_temp;
              struct sockaddr_in6 sa6_src, *sa6_dst;
              u_short        fport = fport_arg, lport = lport_arg;
              u_int32_t flowinfo;
              int errno;
      
              if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
                      return;
      
              sa6_dst = (struct sockaddr_in6 *)dst;
   13         if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
                      return;
      
              /*
               * note that src can be NULL when we get notify by local fragmentation.
               */
              sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
              flowinfo = sa6_src.sin6_flowinfo;
      
              /*
               * Redirects go to all references to the destination,
               * and use in6_rtchange to invalidate the route cache.
               * Dead host indications: also use in6_rtchange to invalidate
               * the cache, and deliver the error to all the sockets.
               * Otherwise, if we have knowledge of the local port and address,
               * deliver only to that socket.
               */
   12         if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
                      fport = 0;
                      lport = 0;
                      bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
      
                      if (cmd != PRC_HOSTDEAD)
                              notify = in6_rtchange;
              }
              errno = inet6ctlerrmap[cmd];
              INP_INFO_WLOCK(pcbinfo);
   12         CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
                      INP_WLOCK(inp);
   12                 if ((inp->inp_vflag & INP_IPV6) == 0) {
                              INP_WUNLOCK(inp);
                              continue;
                      }
      
                      /*
                       * If the error designates a new path MTU for a destination
                       * and the application (associated with this socket) wanted to
                       * know the value, notify.
                       * XXX: should we avoid to notify the value to TCP sockets?
                       */
    2                 if (cmd == PRC_MSGSIZE && cmdarg != NULL)
                              ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
    3                                         *(u_int32_t *)cmdarg);
      
                      /*
                       * Detect if we should notify the error. If no source and
                       * destination ports are specifed, but non-zero flowinfo and
                       * local address match, notify the error. This is the case
                       * when the error is delivered with an encrypted buffer
                       * by ESP. Otherwise, just compare addresses and ports
                       * as usual.
                       */
    2                 if (lport == 0 && fport == 0 && flowinfo &&
                          inp->inp_socket != NULL &&
    3                     flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
                          IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
                              goto do_notify;
                      else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
    5                                              &sa6_dst->sin6_addr) ||
                               inp->inp_socket == 0 ||
                               (lport && inp->inp_lport != lport) ||
                               (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
                                !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
                                                    &sa6_src.sin6_addr)) ||
                               (fport && inp->inp_fport != fport)) {
                              INP_WUNLOCK(inp);
                              continue;
                      }
      
                do_notify:
                      if (notify) {
                              if ((*notify)(inp, errno))
                                      INP_WUNLOCK(inp);
                      } else
                              INP_WUNLOCK(inp);
              }
              INP_INFO_WUNLOCK(pcbinfo);
      }
      
      /*
       * Lookup a PCB based on the local address and port.  Caller must hold the
       * hash lock.  No inpcb locks or references are acquired.
       */
      struct inpcb *
      in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
          u_short lport, int lookupflags, struct ucred *cred)
  100 {
              struct inpcb *inp;
              int matchwild = 3, wildcard;
      
              KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
                  ("%s: invalid lookup flags %d", __func__, lookupflags));
      
  100         INP_HASH_LOCK_ASSERT(pcbinfo);
      
              if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
                      struct inpcbhead *head;
                      /*
                       * Look for an unconnected (wildcard foreign addr) PCB that
                       * matches the local address and port we're looking for.
                       */
                      head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
                          INP6_PCBHASHKEY(&in6addr_any), lport, 0,
                          pcbinfo->ipi_hashmask)];
   11                 CK_LIST_FOREACH(inp, head, inp_hash) {
                              /* XXX inp locking */
    2                         if ((inp->inp_vflag & INP_IPV6) == 0)
                                      continue;
                              if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
    7                             IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                                  inp->inp_lport == lport) {
                                      /* Found. */
                                      if (cred == NULL ||
                                          prison_equal_ip6(cred->cr_prison,
                                              inp->inp_cred->cr_prison))
                                              return (inp);
                              }
                      }
                      /*
                       * Not found.
                       */
                      return (NULL);
              } else {
                      struct inpcbporthead *porthash;
                      struct inpcbport *phd;
                      struct inpcb *match = NULL;
                      /*
                       * Best fit PCB lookup.
                       *
                       * First see if this local port is in use by looking on the
                       * port hash list.
                       */
                      porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
                          pcbinfo->ipi_porthashmask)];
   68                 CK_LIST_FOREACH(phd, porthash, phd_hash) {
                              if (phd->phd_port == lport)
                                      break;
                      }
                      if (phd != NULL) {
                              /*
                               * Port is in use by one or more PCBs. Look for best
                               * fit.
                               */
   21                         CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
                                      wildcard = 0;
   26                                 if (cred != NULL &&
                                          !prison_equal_ip6(cred->cr_prison,
                                              inp->inp_cred->cr_prison))
                                              continue;
                                      /* XXX inp locking */
    5                                 if ((inp->inp_vflag & INP_IPV6) == 0)
                                              continue;
   22                                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
                                              wildcard++;
   15                                 if (!IN6_IS_ADDR_UNSPECIFIED(
                                              &inp->in6p_laddr)) {
   11                                         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
    5                                                 wildcard++;
   11                                         else if (!IN6_ARE_ADDR_EQUAL(
                                                  &inp->in6p_laddr, laddr))
                                                      continue;
                                      } else {
    7                                         if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
                                                      wildcard++;
                                      }
    2                                 if (wildcard < matchwild) {
                                              match = inp;
                                              matchwild = wildcard;
    9                                         if (matchwild == 0)
                                                      break;
                                      }
                              }
                      }
                      return (match);
              }
      }
      
      void
      in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
      {
              struct inpcb *inp;
              struct in6_multi *inm;
              struct in6_mfilter *imf;
              struct ip6_moptions *im6o;
      
              INP_INFO_WLOCK(pcbinfo);
              CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
                      INP_WLOCK(inp);
                      if (__predict_false(inp->inp_flags2 & INP_FREED)) {
                              INP_WUNLOCK(inp);
                              continue;
                      }
                      im6o = inp->in6p_moptions;
                      if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) {
                              /*
                               * Unselect the outgoing ifp for multicast if it
                               * is being detached.
                               */
                              if (im6o->im6o_multicast_ifp == ifp)
                                      im6o->im6o_multicast_ifp = NULL;
                              /*
                               * Drop multicast group membership if we joined
                               * through the interface being detached.
                               */
      restart:
                              IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
                                      if ((inm = imf->im6f_in6m) == NULL)
                                              continue;
                                      if (inm->in6m_ifp != ifp)
                                              continue;
                                      ip6_mfilter_remove(&im6o->im6o_head, imf);
                                      IN6_MULTI_LOCK_ASSERT();
                                      in6_leavegroup_locked(inm, NULL);
                                      ip6_mfilter_free(imf);
                                      goto restart;
                              }
                      }
                      INP_WUNLOCK(inp);
              }
              INP_INFO_WUNLOCK(pcbinfo);
      }
      
      /*
       * Check for alternatives when higher level complains
       * about service problems.  For now, invalidate cached
       * routing information.  If the route was created dynamically
       * (by a redirect), time to try a default gateway again.
       */
      void
      in6_losing(struct inpcb *inp)
      {
      
              RO_INVALIDATE_CACHE(&inp->inp_route6);
      }
      
      /*
       * After a routing change, flush old routing
       * and allocate a (hopefully) better one.
       */
      struct inpcb *
      in6_rtchange(struct inpcb *inp, int errno __unused)
      {
      
              RO_INVALIDATE_CACHE(&inp->inp_route6);
              return inp;
      }
      
      static struct inpcb *
      in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
          const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
          uint16_t fport, int lookupflags)
      {
              struct inpcb *local_wild;
              const struct inpcblbgrouphead *hdr;
              struct inpcblbgroup *grp;
              uint32_t idx;
      
   13         INP_HASH_LOCK_ASSERT(pcbinfo);
      
              hdr = &pcbinfo->ipi_lbgrouphashbase[
                  INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
      
              /*
               * Order of socket selection:
               * 1. non-wild.
               * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
               *
               * NOTE:
               * - Load balanced group does not contain jailed sockets.
               * - Load balanced does not contain IPv4 mapped INET6 wild sockets.
               */
              local_wild = NULL;
   13         CK_LIST_FOREACH(grp, hdr, il_list) {
      #ifdef INET
                      if (!(grp->il_vflag & INP_IPV6))
                              continue;
      #endif
                      if (grp->il_lport != lport)
                              continue;
      
                      idx = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport,
                          fport) % grp->il_inpcnt;
                      if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr))
                              return (grp->il_inp[idx]);
                      if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
                          (lookupflags & INPLOOKUP_WILDCARD) != 0)
                              local_wild = grp->il_inp[idx];
              }
              return (local_wild);
      }
      
      #ifdef PCBGROUP
      /*
       * Lookup PCB in hash list, using pcbgroup tables.
       */
      static struct inpcb *
      in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
          struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
          u_int lport_arg, int lookupflags, struct ifnet *ifp)
      {
              struct inpcbhead *head;
              struct inpcb *inp, *tmpinp;
              u_short fport = fport_arg, lport = lport_arg;
              bool locked;
      
              /*
               * First look for an exact match.
               */
              tmpinp = NULL;
              INP_GROUP_LOCK(pcbgroup);
              head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
                  INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
              CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
                      /* XXX inp locking */
                      if ((inp->inp_vflag & INP_IPV6) == 0)
                              continue;
                      if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
                          IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                          inp->inp_fport == fport &&
                          inp->inp_lport == lport) {
                              /*
                               * XXX We should be able to directly return
                               * the inp here, without any checks.
                               * Well unless both bound with SO_REUSEPORT?
                               */
                              if (prison_flag(inp->inp_cred, PR_IP6))
                                      goto found;
                              if (tmpinp == NULL)
                                      tmpinp = inp;
                      }
              }
              if (tmpinp != NULL) {
                      inp = tmpinp;
                      goto found;
              }
      
              /*
               * Then look for a wildcard match in the pcbgroup.
               */
              if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
                      struct inpcb *local_wild = NULL, *local_exact = NULL;
                      struct inpcb *jail_wild = NULL;
                      int injail;
      
                      /*
                       * Order of socket selection - we always prefer jails.
                       *      1. jailed, non-wild.
                       *      2. jailed, wild.
                       *      3. non-jailed, non-wild.
                       *      4. non-jailed, wild.
                       */
                      head = &pcbgroup->ipg_hashbase[
                          INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
                      CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
                              /* XXX inp locking */
                              if ((inp->inp_vflag & INP_IPV6) == 0)
                                      continue;
      
                              if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
                                  inp->inp_lport != lport) {
                                      continue;
                              }
      
                              injail = prison_flag(inp->inp_cred, PR_IP6);
                              if (injail) {
                                      if (prison_check_ip6(inp->inp_cred,
                                          laddr) != 0)
                                              continue;
                              } else {
                                      if (local_exact != NULL)
                                              continue;
                              }
      
                              if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
                                      if (injail)
                                              goto found;
                                      else
                                              local_exact = inp;
                              } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                                      if (injail)
                                              jail_wild = inp;
                                      else
                                              local_wild = inp;
                              }
                      } /* LIST_FOREACH */
      
                      inp = jail_wild;
                      if (inp == NULL)
                              inp = jail_wild;
                      if (inp == NULL)
                              inp = local_exact;
                      if (inp == NULL)
                              inp = local_wild;
                      if (inp != NULL)
                              goto found;
              }
      
              /*
               * Then look for a wildcard match, if requested.
               */
              if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
                      struct inpcb *local_wild = NULL, *local_exact = NULL;
                      struct inpcb *jail_wild = NULL;
                      int injail;
      
                      /*
                       * Order of socket selection - we always prefer jails.
                       *      1. jailed, non-wild.
                       *      2. jailed, wild.
                       *      3. non-jailed, non-wild.
                       *      4. non-jailed, wild.
                       */
                      head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
                          INP6_PCBHASHKEY(&in6addr_any), lport, 0,
                          pcbinfo->ipi_wildmask)];
                      CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
                              /* XXX inp locking */
                              if ((inp->inp_vflag & INP_IPV6) == 0)
                                      continue;
      
                              if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
                                  inp->inp_lport != lport) {
                                      continue;
                              }
      
                              injail = prison_flag(inp->inp_cred, PR_IP6);
                              if (injail) {
                                      if (prison_check_ip6(inp->inp_cred,
                                          laddr) != 0)
                                              continue;
                              } else {
                                      if (local_exact != NULL)
                                              continue;
                              }
      
                              if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
                                      if (injail)
                                              goto found;
                                      else
                                              local_exact = inp;
                              } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                                      if (injail)
                                              jail_wild = inp;
                                      else
                                              local_wild = inp;
                              }
                      } /* LIST_FOREACH */
      
                      inp = jail_wild;
                      if (inp == NULL)
                              inp = jail_wild;
                      if (inp == NULL)
                              inp = local_exact;
                      if (inp == NULL)
                              inp = local_wild;
                      if (inp != NULL)
                              goto found;
              } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
              INP_GROUP_UNLOCK(pcbgroup);
              return (NULL);
      
      found:
              if (lookupflags & INPLOOKUP_WLOCKPCB)
                      locked = INP_TRY_WLOCK(inp);
              else if (lookupflags & INPLOOKUP_RLOCKPCB)
                      locked = INP_TRY_RLOCK(inp);
              else
                      panic("%s: locking buf", __func__);
              if (!locked)
                      in_pcbref(inp);
              INP_GROUP_UNLOCK(pcbgroup);
              if (!locked) {
                      if (lookupflags & INPLOOKUP_WLOCKPCB) {
                              INP_WLOCK(inp);
                              if (in_pcbrele_wlocked(inp))
                                      return (NULL);
                      } else {
                              INP_RLOCK(inp);
                              if (in_pcbrele_rlocked(inp))
                                      return (NULL);
                      }
              }
      #ifdef INVARIANTS
              if (lookupflags & INPLOOKUP_WLOCKPCB)
                      INP_WLOCK_ASSERT(inp);
              else
                      INP_RLOCK_ASSERT(inp);
      #endif
              return (inp);
      }
      #endif /* PCBGROUP */
      
      /*
       * Lookup PCB in hash list.  Used in in_pcb.c as well as here.
       */
      struct inpcb *
      in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
          u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
          int lookupflags, struct ifnet *ifp)
 1838 {
              struct inpcbhead *head;
              struct inpcb *inp, *tmpinp;
              u_short fport = fport_arg, lport = lport_arg;
      
              KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
                  ("%s: invalid lookup flags %d", __func__, lookupflags));
      
 1838         INP_HASH_LOCK_ASSERT(pcbinfo);
      
              /*
               * First look for an exact match.
               */
              tmpinp = NULL;
              head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
                  INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
 1838         CK_LIST_FOREACH(inp, head, inp_hash) {
                      /* XXX inp locking */
    5                 if ((inp->inp_vflag & INP_IPV6) == 0)
                              continue;
    8                 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
    1                     IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                          inp->inp_fport == fport &&
                          inp->inp_lport == lport) {
                              /*
                               * XXX We should be able to directly return
                               * the inp here, without any checks.
                               * Well unless both bound with SO_REUSEPORT?
                               */
                              if (prison_flag(inp->inp_cred, PR_IP6))
                                      return (inp);
                              if (tmpinp == NULL)
                                      tmpinp = inp;
                      }
              }
   10         if (tmpinp != NULL)
                      return (tmpinp);
      
              /*
               * Then look in lb group (for wildcard match).
               */
 1825         if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
                      inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
                          fport, lookupflags);
                      if (inp != NULL)
                              return (inp);
              }
      
              /*
               * Then look for a wildcard match, if requested.
               */
              if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
                      struct inpcb *local_wild = NULL, *local_exact = NULL;
                      struct inpcb *jail_wild = NULL;
                      int injail;
      
                      /*
                       * Order of socket selection - we always prefer jails.
                       *      1. jailed, non-wild.
                       *      2. jailed, wild.
                       *      3. non-jailed, non-wild.
                       *      4. non-jailed, wild.
                       */
                      head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
                          INP6_PCBHASHKEY(&in6addr_any), lport, 0,
                          pcbinfo->ipi_hashmask)];
   13                 CK_LIST_FOREACH(inp, head, inp_hash) {
                              /* XXX inp locking */
    1                         if ((inp->inp_vflag & INP_IPV6) == 0)
                                      continue;
      
    1                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
                                  inp->inp_lport != lport) {
                                      continue;
                              }
      
                              injail = prison_flag(inp->inp_cred, PR_IP6);
                              if (injail) {
                                      if (prison_check_ip6(inp->inp_cred,
                                          laddr) != 0)
                                              continue;
                              } else {
                                      if (local_exact != NULL)
                                              continue;
                              }
      
                              if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
                                      if (injail)
                                              return (inp);
                                      else
                                              local_exact = inp;
                              } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                                      if (injail)
                                              jail_wild = inp;
                                      else
                                              local_wild = inp;
                              }
                      } /* LIST_FOREACH */
      
                      if (jail_wild != NULL)
                              return (jail_wild);
                      if (local_exact != NULL)
                              return (local_exact);
                      if (local_wild != NULL)
                              return (local_wild);
              } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
      
              /*
               * Not found.
               */
              return (NULL);
      }
      
      /*
       * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
       * hash list lock, and will return the inpcb locked (i.e., requires
       * INPLOOKUP_LOCKPCB).
       */
      static struct inpcb *
      in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
          u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
          struct ifnet *ifp)
   15 {
              struct inpcb *inp;
      
              inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
                  (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
   15         if (inp != NULL) {
                      if (lookupflags & INPLOOKUP_WLOCKPCB) {
                              INP_WLOCK(inp);
                              if (__predict_false(inp->inp_flags2 & INP_FREED)) {
                                      INP_WUNLOCK(inp);
                                      inp = NULL;
                              }
                      } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
                              INP_RLOCK(inp);
                              if (__predict_false(inp->inp_flags2 & INP_FREED)) {
                                      INP_RUNLOCK(inp);
                                      inp = NULL;
                              }
                      } else
                              panic("%s: locking bug", __func__);
      #ifdef INVARIANTS
                      if (inp != NULL) {
                              if (lookupflags & INPLOOKUP_WLOCKPCB)
                                      INP_WLOCK_ASSERT(inp);
                              else
                                      INP_RLOCK_ASSERT(inp);
                      }
      #endif
              }
              return (inp);
      }
      
      /*
       * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
       * from which a pre-calculated hash value may be extracted.
       *
       * Possibly more of this logic should be in in6_pcbgroup.c.
       */
      struct inpcb *
      in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
          struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
    2 {
      #if defined(PCBGROUP) && !defined(RSS)
              struct inpcbgroup *pcbgroup;
      #endif
      
              KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
                  ("%s: invalid lookup flags %d", __func__, lookupflags));
              KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
                  ("%s: LOCKPCB not set", __func__));
      
              /*
               * When not using RSS, use connection groups in preference to the
               * reservation table when looking up 4-tuples.  When using RSS, just
               * use the reservation table, due to the cost of the Toeplitz hash
               * in software.
               *
               * XXXRW: This policy belongs in the pcbgroup code, as in principle
               * we could be doing RSS with a non-Toeplitz hash that is affordable
               * in software.
               */
      #if defined(PCBGROUP) && !defined(RSS)
              if (in_pcbgroup_enabled(pcbinfo)) {
                      pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
                          fport);
                      return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
                          laddr, lport, lookupflags, ifp));
              }
      #endif
    2         return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
                  lookupflags, ifp));
      }
      
      struct inpcb *
      in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
          u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
          struct ifnet *ifp, struct mbuf *m)
   13 {
      #ifdef PCBGROUP
              struct inpcbgroup *pcbgroup;
      #endif
      
              KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
                  ("%s: invalid lookup flags %d", __func__, lookupflags));
              KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
                  ("%s: LOCKPCB not set", __func__));
      
      #ifdef PCBGROUP
              /*
               * If we can use a hardware-generated hash to look up the connection
               * group, use that connection group to find the inpcb.  Otherwise
               * fall back on a software hash -- or the reservation table if we're
               * using RSS.
               *
               * XXXRW: As above, that policy belongs in the pcbgroup code.
               */
              if (in_pcbgroup_enabled(pcbinfo) &&
                  M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) {
                      pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
                          m->m_pkthdr.flowid);
                      if (pcbgroup != NULL)
                              return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
                                  fport, laddr, lport, lookupflags, ifp));
      #ifndef RSS
                      pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
                          fport);
                      return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
                          laddr, lport, lookupflags, ifp));
      #endif
              }
      #endif
   13         return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
                  lookupflags, ifp));
      }
      
      void
      init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int srcordst)
   45 {
              struct ip6_hdr *ip;
      
              ip = mtod(m, struct ip6_hdr *);
              bzero(sin6, sizeof(*sin6));
              sin6->sin6_len = sizeof(*sin6);
              sin6->sin6_family = AF_INET6;
              sin6->sin6_addr = srcordst ? ip->ip6_dst : ip->ip6_src;
      
              (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */
      
              return;
      }
      /*-
       * Copyright (c) 2007-2009 Robert N. M. Watson
       * All rights reserved.
       *
       * This software was developed by Robert Watson for the TrustedBSD Project.
       *
       * This software was developed at the University of Cambridge Computer
       * Laboratory with support from a grant from Google, Inc.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      #include <sys/cdefs.h>
      __FBSDID("$FreeBSD$");
      
      #include "opt_mac.h"
      
      #include <sys/param.h>
      #include <sys/kernel.h>
      #include <sys/lock.h>
      #include <sys/malloc.h>
      #include <sys/mutex.h>
      #include <sys/sbuf.h>
      #include <sys/systm.h>
      #include <sys/mount.h>
      #include <sys/file.h>
      #include <sys/namei.h>
      #include <sys/protosw.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/sysctl.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      
      #include <netinet/in.h>
      #include <netinet/ip6.h>
      #include <netinet6/ip6_var.h>
      
      #include <security/mac/mac_framework.h>
      #include <security/mac/mac_internal.h>
      #include <security/mac/mac_policy.h>
      
      static struct label *
      mac_ip6q_label_alloc(int flag)
      {
              struct label *label;
              int error;
      
              label = mac_labelzone_alloc(flag);
              if (label == NULL)
                      return (NULL);
      
              if (flag & M_WAITOK)
                      MAC_POLICY_CHECK(ip6q_init_label, label, flag);
              else
                      MAC_POLICY_CHECK_NOSLEEP(ip6q_init_label, label, flag);
              if (error) {
                      MAC_POLICY_PERFORM_NOSLEEP(ip6q_destroy_label, label);
                      mac_labelzone_free(label);
                      return (NULL);
              }
              return (label);
      }
      
      int
      mac_ip6q_init(struct ip6q *q6, int flag)
    1 {
      
    1         if (mac_labeled & MPC_OBJECT_IP6Q) {
                      q6->ip6q_label = mac_ip6q_label_alloc(flag);
                      if (q6->ip6q_label == NULL)
                              return (ENOMEM);
              } else
                      q6->ip6q_label = NULL;
              return (0);
      }
      
      static void
      mac_ip6q_label_free(struct label *label)
      {
      
              MAC_POLICY_PERFORM_NOSLEEP(ip6q_destroy_label, label);
              mac_labelzone_free(label);
      }
      
      void
      mac_ip6q_destroy(struct ip6q *q6)
    1 {
      
    1         if (q6->ip6q_label != NULL) {
                      mac_ip6q_label_free(q6->ip6q_label);
                      q6->ip6q_label = NULL;
              }
      }
      
      void
      mac_ip6q_reassemble(struct ip6q *q6, struct mbuf *m)
      {
              struct label *label;
      
              if (mac_policy_count == 0)
                      return;
      
              label = mac_mbuf_to_label(m);
      
              MAC_POLICY_PERFORM_NOSLEEP(ip6q_reassemble, q6, q6->ip6q_label, m,
                  label);
      }
      
      void
      mac_ip6q_create(struct mbuf *m, struct ip6q *q6)
    1 {
              struct label *label;
      
    1         if (mac_policy_count == 0)
                      return;
      
              label = mac_mbuf_to_label(m);
      
              MAC_POLICY_PERFORM_NOSLEEP(ip6q_create, m, label, q6,
                  q6->ip6q_label);
      }
      
      int
      mac_ip6q_match(struct mbuf *m, struct ip6q *q6)
    2 {
              struct label *label;
              int result;
      
    2         if (mac_policy_count == 0)
                      return (1);
      
              label = mac_mbuf_to_label(m);
      
              result = 1;
              MAC_POLICY_BOOLEAN_NOSLEEP(ip6q_match, &&, m, label, q6,
                  q6->ip6q_label);
      
              return (result);
      }
      
      void
      mac_ip6q_update(struct mbuf *m, struct ip6q *q6)
      {
              struct label *label;
      
              if (mac_policy_count == 0)
                      return;
      
              label = mac_mbuf_to_label(m);
      
              MAC_POLICY_PERFORM_NOSLEEP(ip6q_update, m, label, q6,
                  q6->ip6q_label);
      }
      
      void
      mac_netinet6_nd6_send(struct ifnet *ifp, struct mbuf *m)
 2020 {
              struct label *mlabel;
      
 2020         if (mac_policy_count == 0)
                      return;
      
              mlabel = mac_mbuf_to_label(m);
      
              MAC_POLICY_PERFORM_NOSLEEP(netinet6_nd6_send, ifp, ifp->if_label, m,
                  mlabel);
      }
      /*-
       * Copyright (c) 2013-2015 Gleb Smirnoff <glebius@FreeBSD.org>
       * Copyright (c) 1998, David Greenman. All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      #include <sys/cdefs.h>
      __FBSDID("$FreeBSD$");
      
      #include "opt_kern_tls.h"
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/capsicum.h>
      #include <sys/kernel.h>
      #include <sys/lock.h>
      #include <sys/ktls.h>
      #include <sys/mutex.h>
      #include <sys/malloc.h>
      #include <sys/mman.h>
      #include <sys/mount.h>
      #include <sys/mbuf.h>
      #include <sys/proc.h>
      #include <sys/protosw.h>
      #include <sys/rwlock.h>
      #include <sys/sf_buf.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/syscallsubr.h>
      #include <sys/sysctl.h>
      #include <sys/sysproto.h>
      #include <sys/vnode.h>
      
      #include <net/vnet.h>
      #include <netinet/in.h>
      #include <netinet/tcp.h>
      
      #include <security/audit/audit.h>
      #include <security/mac/mac_framework.h>
      
      #include <vm/vm.h>
      #include <vm/vm_object.h>
      #include <vm/vm_pager.h>
      
      static MALLOC_DEFINE(M_SENDFILE, "sendfile", "sendfile dynamic memory");
      
      #define        EXT_FLAG_SYNC                EXT_FLAG_VENDOR1
      #define        EXT_FLAG_NOCACHE        EXT_FLAG_VENDOR2
      #define        EXT_FLAG_CACHE_LAST        EXT_FLAG_VENDOR3
      
      /*
       * Structure describing a single sendfile(2) I/O, which may consist of
       * several underlying pager I/Os.
       *
       * The syscall context allocates the structure and initializes 'nios'
       * to 1.  As sendfile_swapin() runs through pages and starts asynchronous
       * paging operations, it increments 'nios'.
       *
       * Every I/O completion calls sendfile_iodone(), which decrements the 'nios',
       * and the syscall also calls sendfile_iodone() after allocating all mbufs,
       * linking them and sending to socket.  Whoever reaches zero 'nios' is
       * responsible to * call pru_ready on the socket, to notify it of readyness
       * of the data.
       */
      struct sf_io {
              volatile u_int        nios;
              u_int                error;
              int                npages;
              struct socket        *so;
              struct mbuf        *m;
              vm_object_t        obj;
              vm_pindex_t        pindex0;
      #ifdef KERN_TLS
              struct ktls_session *tls;
      #endif
              vm_page_t        pa[];
      };
      
      /*
       * Structure used to track requests with SF_SYNC flag.
       */
      struct sendfile_sync {
              struct mtx        mtx;
              struct cv        cv;
              unsigned        count;
              bool                waiting;
      };
      
      static void
      sendfile_sync_destroy(struct sendfile_sync *sfs)
      {
              KASSERT(sfs->count == 0, ("sendfile sync %p still busy", sfs));
      
              cv_destroy(&sfs->cv);
              mtx_destroy(&sfs->mtx);
              free(sfs, M_SENDFILE);
      }
      
      static void
      sendfile_sync_signal(struct sendfile_sync *sfs)
    4 {
              mtx_lock(&sfs->mtx);
              KASSERT(sfs->count > 0, ("sendfile sync %p not busy", sfs));
    1         if (--sfs->count == 0) {
                      if (!sfs->waiting) {
                              /* The sendfile() waiter was interrupted by a signal. */
                              sendfile_sync_destroy(sfs);
                              return;
                      } else {
    4                         cv_signal(&sfs->cv);
                      }
              }
              mtx_unlock(&sfs->mtx);
      }
      
      counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
      
      static void
      sfstat_init(const void *unused)
      {
      
              COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
                  M_WAITOK);
      }
      SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
      
      static int
      sfstat_sysctl(SYSCTL_HANDLER_ARGS)
      {
              struct sfstat s;
      
              COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
              if (req->newptr)
                      COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
              return (SYSCTL_OUT(req, &s, sizeof(s)));
      }
      SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat,
          CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
          sfstat_sysctl, "I",
          "sendfile statistics");
      
      static void
      sendfile_free_mext(struct mbuf *m)
   10 {
              struct sf_buf *sf;
              vm_page_t pg;
              int flags;
      
              KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF,
                  ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m));
      
              sf = m->m_ext.ext_arg1;
              pg = sf_buf_page(sf);
              flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
      
              sf_buf_free(sf);
              vm_page_release(pg, flags);
      
    4         if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
    4                 struct sendfile_sync *sfs = m->m_ext.ext_arg2;
                      sendfile_sync_signal(sfs);
              }
      }
      
      static void
      sendfile_free_mext_pg(struct mbuf *m)
      {
              vm_page_t pg;
              int flags, i;
              bool cache_last;
      
              M_ASSERTEXTPG(m);
      
              cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
              flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0;
      
              for (i = 0; i < m->m_epg_npgs; i++) {
                      if (cache_last && i == m->m_epg_npgs - 1)
                              flags = 0;
                      pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
                      vm_page_release(pg, flags);
              }
      
              if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
                      struct sendfile_sync *sfs = m->m_ext.ext_arg1;
                      sendfile_sync_signal(sfs);
              }
      }
      
      /*
       * Helper function to calculate how much data to put into page i of n.
       * Only first and last pages are special.
       */
      static inline off_t
      xfsize(int i, int n, off_t off, off_t len)
      {
      
              if (i == 0)
                      return (omin(PAGE_SIZE - (off & PAGE_MASK), len));
      
              if (i == n - 1 && ((off + len) & PAGE_MASK) > 0)
                      return ((off + len) & PAGE_MASK);
      
              return (PAGE_SIZE);
      }
      
      /*
       * Helper function to get offset within object for i page.
       */
      static inline vm_ooffset_t
      vmoff(int i, off_t off)
      {
      
              if (i == 0)
                      return ((vm_ooffset_t)off);
      
              return (trunc_page(off + i * PAGE_SIZE));
      }
      
      /*
       * Helper function used when allocation of a page or sf_buf failed.
       * Pretend as if we don't have enough space, subtract xfsize() of
       * all pages that failed.
       */
      static inline void
      fixspace(int old, int new, off_t off, int *space)
      {
      
              KASSERT(old > new, ("%s: old %d new %d", __func__, old, new));
      
              /* Subtract last one. */
              *space -= xfsize(old - 1, old, off, *space);
              old--;
      
              if (new == old)
                      /* There was only one page. */
                      return;
      
              /* Subtract first one. */
              if (new == 0) {
                      *space -= xfsize(0, old, off, *space);
                      new++;
              }
      
              /* Rest of pages are full sized. */
              *space -= (old - new) * PAGE_SIZE;
      
              KASSERT(*space >= 0, ("%s: space went backwards", __func__));
      }
      
      /*
       * Wait for all in-flight ios to complete, we must not unwire pages
       * under them.
       */
      static void
      sendfile_iowait(struct sf_io *sfio, const char *wmesg)
      {
              while (atomic_load_int(&sfio->nios) != 1)
                      pause(wmesg, 1);
      }
      
      /*
       * I/O completion callback.
       */
      static void
      sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
   23 {
              struct sf_io *sfio = arg;
              struct socket *so;
              int i;
      
   23         if (error != 0)
                      sfio->error = error;
      
              /*
               * Restore the valid page pointers.  They are already
               * unbusied, but still wired.
               *
               * XXXKIB since pages are only wired, and we do not
               * own the object lock, other users might have
               * invalidated them in meantime.  Similarly, after we
               * unbusied the swapped-in pages, they can become
               * invalid under us.
               */
   23         MPASS(count == 0 || pa[0] != bogus_page);
              for (i = 0; i < count; i++) {
                      if (pa[i] == bogus_page) {
                              sfio->pa[(pa[0]->pindex - sfio->pindex0) + i] =
                                  pa[i] = vm_page_relookup(sfio->obj,
                                  pa[0]->pindex + i);
                              KASSERT(pa[i] != NULL,
                                  ("%s: page %p[%d] disappeared",
                                  __func__, pa, i));
                      } else {
                              vm_page_xunbusy_unchecked(pa[i]);
                      }
              }
      
              if (!refcount_release(&sfio->nios))
                      return;
      
      #ifdef INVARIANTS
   19         for (i = 1; i < sfio->npages; i++) {
                      if (sfio->pa[i] == NULL)
                              break;
                      KASSERT(vm_page_wired(sfio->pa[i]),
                          ("sfio %p page %d %p not wired", sfio, i, sfio->pa[i]));
                      if (i == 0)
                              continue;
                      KASSERT(sfio->pa[0]->object == sfio->pa[i]->object,
                          ("sfio %p page %d %p wrong owner %p %p", sfio, i,
                          sfio->pa[i], sfio->pa[0]->object, sfio->pa[i]->object));
                      KASSERT(sfio->pa[0]->pindex + i == sfio->pa[i]->pindex,
                          ("sfio %p page %d %p wrong index %jx %jx", sfio, i,
                          sfio->pa[i], (uintmax_t)sfio->pa[0]->pindex,
                          (uintmax_t)sfio->pa[i]->pindex));
              }
      #endif
      
              vm_object_pip_wakeup(sfio->obj);
      
              if (sfio->m == NULL) {
                      /*
                       * Either I/O operation failed, or we failed to allocate
                       * buffers, or we bailed out on first busy page, or we
                       * succeeded filling the request without any I/Os. Anyway,
                       * pru_send hadn't been executed - nothing had been sent
                       * to the socket yet.
                       */
   19                 MPASS((curthread->td_pflags & TDP_KTHREAD) == 0);
                      free(sfio, M_SENDFILE);
                      return;
              }
      
      #if defined(KERN_TLS) && defined(INVARIANTS)
              if ((sfio->m->m_flags & M_EXTPG) != 0)
                      KASSERT(sfio->tls == sfio->m->m_epg_tls,
                          ("TLS session mismatch"));
              else
                      KASSERT(sfio->tls == NULL,
                          ("non-ext_pgs mbuf with TLS session"));
      #endif
              so = sfio->so;
              CURVNET_SET(so->so_vnet);
              if (__predict_false(sfio->error)) {
                      /*
                       * I/O operation failed.  The state of data in the socket
                       * is now inconsistent, and all what we can do is to tear
                       * it down. Protocol abort method would tear down protocol
                       * state, free all ready mbufs and detach not ready ones.
                       * We will free the mbufs corresponding to this I/O manually.
                       *
                       * The socket would be marked with EIO and made available
                       * for read, so that application receives EIO on next
                       * syscall and eventually closes the socket.
                       */
                      so->so_proto->pr_usrreqs->pru_abort(so);
                      so->so_error = EIO;
      
                      mb_free_notready(sfio->m, sfio->npages);
      #ifdef KERN_TLS
              } else if (sfio->tls != NULL && sfio->tls->mode == TCP_TLS_MODE_SW) {
                      /*
                       * I/O operation is complete, but we still need to
                       * encrypt.  We cannot do this in the interrupt thread
                       * of the disk controller, so forward the mbufs to a
                       * different thread.
                       *
                       * Donate the socket reference from sfio to rather
                       * than explicitly invoking soref().
                       */
                      ktls_enqueue(sfio->m, so, sfio->npages);
                      goto out_with_ref;
      #endif
              } else
                      (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
                          sfio->npages);
      
              SOCK_LOCK(so);
              sorele(so);
      #ifdef KERN_TLS
      out_with_ref:
      #endif
              CURVNET_RESTORE();
              free(sfio, M_SENDFILE);
      }
      
      /*
       * Iterate through pages vector and request paging for non-valid pages.
       */
      static int
      sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
          off_t len, int npages, int rhpages, int flags)
      {
              vm_page_t *pa;
              int a, count, count1, grabbed, i, j, rv;
      
              pa = sfio->pa;
              *nios = 0;
              flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
              sfio->pindex0 = OFF_TO_IDX(off);
      
              /*
               * First grab all the pages and wire them.  Note that we grab
               * only required pages.  Readahead pages are dealt with later.
               */
              grabbed = vm_page_grab_pages_unlocked(obj, OFF_TO_IDX(off),
                  VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages);
   23         if (grabbed < npages) {
                      for (int i = grabbed; i < npages; i++)
                              pa[i] = NULL;
                      npages = grabbed;
                      rhpages = 0;
              }
      
   23         for (i = 0; i < npages;) {
                      /* Skip valid pages. */
                      if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
                          xfsize(i, npages, off, len))) {
   17                         vm_page_xunbusy(pa[i]);
                              SFSTAT_INC(sf_pages_valid);
                              i++;
                              continue;
                      }
      
                      /*
                       * Next page is invalid.  Check if it belongs to pager.  It
                       * may not be there, which is a regular situation for shmem
                       * pager.  For vnode pager this happens only in case of
                       * a sparse file.
                       *
                       * Important feature of vm_pager_has_page() is the hint
                       * stored in 'a', about how many pages we can pagein after
                       * this page in a single I/O.
                       */
                      VM_OBJECT_RLOCK(obj);
                      if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL,
                          &a)) {
                              VM_OBJECT_RUNLOCK(obj);
                              pmap_zero_page(pa[i]);
                              vm_page_valid(pa[i]);
                              MPASS(pa[i]->dirty == 0);
    3                         vm_page_xunbusy(pa[i]);
                              i++;
                              continue;
                      }
                      VM_OBJECT_RUNLOCK(obj);
      
                      /*
                       * We want to pagein as many pages as possible, limited only
                       * by the 'a' hint and actual request.
                       */
                      count = min(a + 1, npages - i);
      
                      /*
                       * We should not pagein into a valid page because
                       * there might be still unfinished write tracked by
                       * e.g. a buffer, thus we substitute any valid pages
                       * with the bogus one.
                       *
                       * We must not leave around xbusy pages which are not
                       * part of the run passed to vm_pager_getpages(),
                       * otherwise pager might deadlock waiting for the busy
                       * status of the page, e.g. if it constitues the
                       * buffer needed to validate other page.
                       *
                       * First trim the end of the run consisting of the
                       * valid pages, then replace the rest of the valid
                       * with bogus.
                       */
                      count1 = count;
    3                 for (j = i + count - 1; j > i; j--) {
    2                         if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
                                  xfsize(j, npages, off, len))) {
                                      vm_page_xunbusy(pa[j]);
                                      SFSTAT_INC(sf_pages_valid);
                                      count--;
                              } else {
                                      break;
                              }
                      }
      
                      /*
                       * The last page in the run pa[i + count - 1] is
                       * guaranteed to be invalid by the trim above, so it
                       * is not replaced with bogus, thus -1 in the loop end
                       * condition.
                       */
                      MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL);
    5                 for (j = i + 1; j < i + count - 1; j++) {
                              if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
                                  xfsize(j, npages, off, len))) {
                                      vm_page_xunbusy(pa[j]);
                                      SFSTAT_INC(sf_pages_valid);
                                      SFSTAT_INC(sf_pages_bogus);
                                      pa[j] = bogus_page;
                              }
                      }
      
                      refcount_acquire(&sfio->nios);
                      rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
                          i + count == npages ? &rhpages : NULL,
                          &sendfile_iodone, sfio);
                      if (__predict_false(rv != VM_PAGER_OK)) {
                              sendfile_iowait(sfio, "sferrio");
      
                              /*
                               * Do remaining pages recovery before returning EIO.
                               * Pages from 0 to npages are wired.
                               * Pages from (i + count1) to npages are busied.
                               */
                              for (j = 0; j < npages; j++) {
                                      if (j >= i + count1)
                                              vm_page_xunbusy(pa[j]);
                                      KASSERT(pa[j] != NULL && pa[j] != bogus_page,
                                          ("%s: page %p[%d] I/O recovery failure",
                                          __func__, pa, j));
                                      vm_page_unwire(pa[j], PQ_INACTIVE);
                              }
                              return (EIO);
                      }
      
                      SFSTAT_INC(sf_iocnt);
                      SFSTAT_ADD(sf_pages_read, count);
                      if (i + count == npages)
    5                         SFSTAT_ADD(sf_rhpages_read, rhpages);
      
                      i += count1;
                      (*nios)++;
              }
      
    5         if (*nios == 0 && npages != 0)
                      SFSTAT_INC(sf_noiocnt);
      
              return (0);
      }
      
      static int
      sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
          struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
          int *bsize)
      {
              struct vattr va;
              vm_object_t obj;
              struct vnode *vp;
              struct shmfd *shmfd;
              int error;
      
              vp = *vp_res = NULL;
              obj = NULL;
              shmfd = *shmfd_res = NULL;
              *bsize = 0;
      
              /*
               * The file descriptor must be a regular file and have a
               * backing VM object.
               */
              if (fp->f_type == DTYPE_VNODE) {
                      vp = fp->f_vnode;
                      vn_lock(vp, LK_SHARED | LK_RETRY);
    1                 if (vp->v_type != VREG) {
                              error = EINVAL;
                              goto out;
                      }
                      *bsize = vp->v_mount->mnt_stat.f_iosize;
                      error = VOP_GETATTR(vp, &va, td->td_ucred);
                      if (error != 0)
                              goto out;
                      *obj_size = va.va_size;
                      obj = vp->v_object;
   37                 if (obj == NULL) {
                              error = EINVAL;
                              goto out;
                      }
              } else if (fp->f_type == DTYPE_SHM) {
                      error = 0;
                      shmfd = fp->f_data;
                      obj = shmfd->shm_object;
                      *obj_size = shmfd->shm_size;
              } else {
                      error = EINVAL;
                      goto out;
              }
      
              VM_OBJECT_WLOCK(obj);
              if ((obj->flags & OBJ_DEAD) != 0) {
                      VM_OBJECT_WUNLOCK(obj);
                      error = EBADF;
                      goto out;
              }
      
              /*
               * Temporarily increase the backing VM object's reference
               * count so that a forced reclamation of its vnode does not
               * immediately destroy it.
               */
   37         vm_object_reference_locked(obj);
              VM_OBJECT_WUNLOCK(obj);
              *obj_res = obj;
              *vp_res = vp;
              *shmfd_res = shmfd;
      
      out:
   38         if (vp != NULL)
                      VOP_UNLOCK(vp);
              return (error);
      }
      
      static int
      sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
          struct socket **so)
      {
              int error;
      
              *sock_fp = NULL;
              *so = NULL;
      
              /*
               * The socket must be a stream socket and connected.
               */
              error = getsock_cap(td, s, &cap_send_rights,
                  sock_fp, NULL, NULL);
    2         if (error != 0)
                      return (error);
              *so = (*sock_fp)->f_data;
    1         if ((*so)->so_type != SOCK_STREAM)
                      return (EINVAL);
              /*
               * SCTP one-to-one style sockets currently don't work with
               * sendfile(). So indicate EINVAL for now.
               */
    1         if ((*so)->so_proto->pr_protocol == IPPROTO_SCTP)
                      return (EINVAL);
              if (SOLISTENING(*so))
                      return (ENOTCONN);
              return (0);
      }
      
      int
      vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
          struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
          struct thread *td)
   38 {
              struct file *sock_fp;
              struct vnode *vp;
              struct vm_object *obj;
              vm_page_t pga;
              struct socket *so;
      #ifdef KERN_TLS
              struct ktls_session *tls;
      #endif
              struct mbuf *m, *mh, *mhtail;
              struct sf_buf *sf;
              struct shmfd *shmfd;
              struct sendfile_sync *sfs;
              struct vattr va;
              off_t off, sbytes, rem, obj_size;
              int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
      #ifdef KERN_TLS
              int tls_enq_cnt;
      #endif
              bool use_ext_pgs;
      
              obj = NULL;
              so = NULL;
              m = mh = NULL;
              sfs = NULL;
      #ifdef KERN_TLS
              tls = NULL;
      #endif
              hdrlen = sbytes = 0;
              softerr = 0;
              use_ext_pgs = false;
      
              error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
    1         if (error != 0)
                      return (error);
      
              error = sendfile_getsock(td, sockfd, &sock_fp, &so);
              if (error != 0)
                      goto out;
      
      #ifdef MAC
              error = mac_socket_check_send(td->td_ucred, so);
              if (error != 0)
                      goto out;
      #endif
      
              SFSTAT_INC(sf_syscalls);
              SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags));
      
   26         if (flags & SF_SYNC) {
                      sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK | M_ZERO);
                      mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
                      cv_init(&sfs->cv, "sendfile");
                      sfs->waiting = true;
              }
      
              rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset;
      
              /*
               * Protect against multiple writers to the socket.
               *
               * XXXRW: Historically this has assumed non-interruptibility, so now
               * we implement that, but possibly shouldn't.
               */
              (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
      #ifdef KERN_TLS
              tls = ktls_hold(so->so_snd.sb_tls_info);
      #endif
      
              /*
               * Loop through the pages of the file, starting with the requested
               * offset. Get a file page (do I/O if necessary), map the file page
               * into an sf_buf, attach an mbuf header to the sf_buf, and queue
               * it on the socket.
               * This is done in two loops.  The inner loop turns as many pages
               * as it can, up to available socket buffer space, without blocking
               * into mbufs to have it bulk delivered into the socket send buffer.
               * The outer loop checks the state and available space of the socket
               * and takes care of the overall progress.
               */
              for (off = offset; rem > 0; ) {
                      struct sf_io *sfio;
                      vm_page_t *pa;
                      struct mbuf *m0, *mtail;
                      int nios, space, npages, rhpages;
      
                      mtail = NULL;
                      /*
                       * Check the socket state for ongoing connection,
                       * no errors and space in socket buffer.
                       * If space is low allow for the remainder of the
                       * file to be processed if it fits the socket buffer.
                       * Otherwise block in waiting for sufficient space
                       * to proceed, or if the socket is nonblocking, return
                       * to userland with EAGAIN while reporting how far
                       * we've come.
                       * We wait until the socket buffer has significant free
                       * space to do bulk sends.  This makes good use of file
                       * system read ahead and allows packet segmentation
                       * offloading hardware to take over lots of work.  If
                       * we were not careful here we would send off only one
                       * sfbuf at a time.
                       */
                      SOCKBUF_LOCK(&so->so_snd);
   11                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
   26                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
      retry_space:
                      if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
                              error = EPIPE;
                              SOCKBUF_UNLOCK(&so->so_snd);
                              goto done;
                      } else if (so->so_error) {
                              error = so->so_error;
                              so->so_error = 0;
                              SOCKBUF_UNLOCK(&so->so_snd);
                              goto done;
                      }
                      if ((so->so_state & SS_ISCONNECTED) == 0) {
                              SOCKBUF_UNLOCK(&so->so_snd);
                              error = ENOTCONN;
                              goto done;
                      }
      
   24                 space = sbspace(&so->so_snd);
   24                 if (space < rem &&
    8                     (space <= 0 ||
                           space < so->so_snd.sb_lowat)) {
                              if (so->so_state & SS_NBIO) {
                                      SOCKBUF_UNLOCK(&so->so_snd);
                                      error = EAGAIN;
                                      goto done;
                              }
                              /*
                               * sbwait drops the lock while sleeping.
                               * When we loop back to retry_space the
                               * state may have changed and we retest
                               * for it.
                               */
                              error = sbwait(&so->so_snd);
                              /*
                               * An error from sbwait usually indicates that we've
                               * been interrupted by a signal. If we've sent anything
                               * then return bytes sent, otherwise return the error.
                               */
    2                         if (error != 0) {
                                      SOCKBUF_UNLOCK(&so->so_snd);
                                      goto done;
                              }
                              goto retry_space;
                      }
                      SOCKBUF_UNLOCK(&so->so_snd);
      
                      /*
                       * At the beginning of the first loop check if any headers
                       * are specified and copy them into mbufs.  Reduce space in
                       * the socket buffer by the size of the header mbuf chain.
                       * Clear hdr_uio here and hdrlen at the end of the first loop.
                       */
   19                 if (hdr_uio != NULL && hdr_uio->uio_resid > 0) {
                              hdr_uio->uio_td = td;
                              hdr_uio->uio_rw = UIO_WRITE;
      #ifdef KERN_TLS
                              if (tls != NULL)
                                      mh = m_uiotombuf(hdr_uio, M_WAITOK, space,
                                          tls->params.max_frame_len, M_EXTPG);
                              else
      #endif
    6                                 mh = m_uiotombuf(hdr_uio, M_WAITOK,
                                          space, 0, 0);
                              hdrlen = m_length(mh, &mhtail);
                              space -= hdrlen;
                              /*
                               * If header consumed all the socket buffer space,
                               * don't waste CPU cycles and jump to the end.
                               */
    6                         if (space == 0) {
                                      sfio = NULL;
                                      nios = 0;
                                      goto prepend_header;
                              }
                              hdr_uio = NULL;
                      }
      
                      if (vp != NULL) {
                              error = vn_lock(vp, LK_SHARED);
                              if (error != 0)
                                      goto done;
                              error = VOP_GETATTR(vp, &va, td->td_ucred);
    1                         if (error != 0 || off >= va.va_size) {
                                      VOP_UNLOCK(vp);
                                      goto done;
                              }
   23                         if (va.va_size != obj_size) {
                                      obj_size = va.va_size;
                                      rem = nbytes ?
                                          omin(nbytes + offset, obj_size) : obj_size;
                                      rem -= off;
                              }
                      }
      
                      if (space > rem)
   16                         space = rem;
                      else if (space > PAGE_SIZE) {
                              /*
                               * Use page boundaries when possible for large
                               * requests.
                               */
                              if (off & PAGE_MASK)
    2                                 space -= (PAGE_SIZE - (off & PAGE_MASK));
                              space = trunc_page(space);
                              if (off & PAGE_MASK)
                                      space += (PAGE_SIZE - (off & PAGE_MASK));
                      }
      
                      npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
      
                      /*
                       * Calculate maximum allowed number of pages for readahead
                       * at this iteration.  If SF_USER_READAHEAD was set, we don't
                       * do any heuristics and use exactly the value supplied by
                       * application.  Otherwise, we allow readahead up to "rem".
                       * If application wants more, let it be, but there is no
                       * reason to go above MAXPHYS.  Also check against "obj_size",
                       * since vm_pager_has_page() can hint beyond EOF.
                       */
    7                 if (flags & SF_USER_READAHEAD) {
                              rhpages = SF_READAHEAD(flags);
                      } else {
   16                         rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) -
                                  npages;
                              rhpages += SF_READAHEAD(flags);
                      }
                      rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages);
                      rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) -
                          npages, rhpages);
      
                      sfio = malloc(sizeof(struct sf_io) +
                          npages * sizeof(vm_page_t), M_SENDFILE, M_WAITOK);
                      refcount_init(&sfio->nios, 1);
                      sfio->obj = obj;
                      sfio->error = 0;
                      sfio->m = NULL;
      #ifdef KERN_TLS
                      /*
                       * This doesn't use ktls_hold() because sfio->m will
                       * also have a reference on 'tls' that will be valid
                       * for all of sfio's lifetime.
                       */
                      sfio->tls = tls;
      #endif
                      vm_object_pip_add(obj, 1);
                      error = sendfile_swapin(obj, sfio, &nios, off, space, npages,
                          rhpages, flags);
                      if (error != 0) {
                              if (vp != NULL)
                                      VOP_UNLOCK(vp);
                              sendfile_iodone(sfio, NULL, 0, error);
                              goto done;
                      }
      
                      /*
                       * Loop and construct maximum sized mbuf chain to be bulk
                       * dumped into socket buffer.
                       */
                      pa = sfio->pa;
      
                      /*
                       * Use unmapped mbufs if enabled for TCP.  Unmapped
                       * bufs are restricted to TCP as that is what has been
                       * tested.  In particular, unmapped mbufs have not
                       * been tested with UNIX-domain sockets.
                       *
                       * TLS frames always require unmapped mbufs.
                       */
   23                 if ((mb_use_ext_pgs &&
                          so->so_proto->pr_protocol == IPPROTO_TCP)
      #ifdef KERN_TLS
                          || tls != NULL
      #endif
                          ) {
                              use_ext_pgs = true;
      #ifdef KERN_TLS
                              if (tls != NULL)
                                      max_pgs = num_pages(tls->params.max_frame_len);
                              else
      #endif
                                      max_pgs = MBUF_PEXT_MAX_PGS;
      
                              /* Start at last index, to wrap on first use. */
                              ext_pgs_idx = max_pgs - 1;
                      }
      
   23                 for (int i = 0; i < npages; i++) {
                              /*
                               * If a page wasn't grabbed successfully, then
                               * trim the array. Can happen only with SF_NODISKIO.
                               */
                              if (pa[i] == NULL) {
                                      SFSTAT_INC(sf_busy);
                                      fixspace(npages, i, off, &space);
                                      npages = i;
                                      softerr = EBUSY;
                                      break;
                              }
                              pga = pa[i];
   23                         if (pga == bogus_page)
                                      pga = vm_page_relookup(obj, sfio->pindex0 + i);
      
                              if (use_ext_pgs) {
                                      off_t xfs;
      
                                      ext_pgs_idx++;
                                      if (ext_pgs_idx == max_pgs) {
                                              m0 = mb_alloc_ext_pgs(M_WAITOK,
                                                  sendfile_free_mext_pg);
      
                                              if (flags & SF_NOCACHE) {
                                                      m0->m_ext.ext_flags |=
                                                          EXT_FLAG_NOCACHE;
      
                                                      /*
                                                       * See comment below regarding
                                                       * ignoring SF_NOCACHE for the
                                                       * last page.
                                                       */
                                                      if ((npages - i <= max_pgs) &&
                                                          ((off + space) & PAGE_MASK) &&
                                                          (rem > space || rhpages > 0))
                                                              m0->m_ext.ext_flags |=
                                                                  EXT_FLAG_CACHE_LAST;
                                              }
                                              if (sfs != NULL) {
                                                      m0->m_ext.ext_flags |=
                                                          EXT_FLAG_SYNC;
                                                      m0->m_ext.ext_arg1 = sfs;
                                                      mtx_lock(&sfs->mtx);
                                                      sfs->count++;
                                                      mtx_unlock(&sfs->mtx);
                                              }
                                              ext_pgs_idx = 0;
      
                                              /* Append to mbuf chain. */
                                              if (mtail != NULL)
                                                      mtail->m_next = m0;
                                              else
                                                      m = m0;
                                              mtail = m0;
                                              m0->m_epg_1st_off =
                                                  vmoff(i, off) & PAGE_MASK;
                                      }
                                      if (nios) {
                                              mtail->m_flags |= M_NOTREADY;
                                              m0->m_epg_nrdy++;
                                      }
      
                                      m0->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga);
                                      m0->m_epg_npgs++;
                                      xfs = xfsize(i, npages, off, space);
                                      m0->m_epg_last_len = xfs;
                                      MBUF_EXT_PGS_ASSERT_SANITY(m0);
                                      mtail->m_len += xfs;
                                      mtail->m_ext.ext_size += PAGE_SIZE;
                                      continue;
                              }
      
                              /*
                               * Get a sendfile buf.  When allocating the
                               * first buffer for mbuf chain, we usually
                               * wait as long as necessary, but this wait
                               * can be interrupted.  For consequent
                               * buffers, do not sleep, since several
                               * threads might exhaust the buffers and then
                               * deadlock.
                               */
                              sf = sf_buf_alloc(pga,
                                  m != NULL ? SFB_NOWAIT : SFB_CATCH);
                              if (sf == NULL) {
                                      SFSTAT_INC(sf_allocfail);
                                      sendfile_iowait(sfio, "sfnosf");
                                      for (int j = i; j < npages; j++)
                                              vm_page_unwire(pa[j], PQ_INACTIVE);
                                      if (m == NULL)
                                              softerr = ENOBUFS;
                                      fixspace(npages, i, off, &space);
                                      npages = i;
                                      break;
                              }
      
                              m0 = m_get(M_WAITOK, MT_DATA);
                              m0->m_ext.ext_buf = (char *)sf_buf_kva(sf);
                              m0->m_ext.ext_size = PAGE_SIZE;
                              m0->m_ext.ext_arg1 = sf;
                              m0->m_ext.ext_type = EXT_SFBUF;
                              m0->m_ext.ext_flags = EXT_FLAG_EMBREF;
                              m0->m_ext.ext_free = sendfile_free_mext;
                              /*
                               * SF_NOCACHE sets the page as being freed upon send.
                               * However, we ignore it for the last page in 'space',
                               * if the page is truncated, and we got more data to
                               * send (rem > space), or if we have readahead
                               * configured (rhpages > 0).
                               */
   18                         if ((flags & SF_NOCACHE) &&
                                  (i != npages - 1 ||
                                  !((off + space) & PAGE_MASK) ||
                                  !(rem > space || rhpages > 0)))
    5                                 m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE;
   17                         if (sfs != NULL) {
    6                                 m0->m_ext.ext_flags |= EXT_FLAG_SYNC;
                                      m0->m_ext.ext_arg2 = sfs;
                                      mtx_lock(&sfs->mtx);
                                      sfs->count++;
                                      mtx_unlock(&sfs->mtx);
                              }
                              m0->m_ext.ext_count = 1;
                              m0->m_flags |= (M_EXT | M_RDONLY);
                              if (nios)
                                      m0->m_flags |= M_NOTREADY;
                              m0->m_data = (char *)sf_buf_kva(sf) +
                                  (vmoff(i, off) & PAGE_MASK);
                              m0->m_len = xfsize(i, npages, off, space);
      
                              /* Append to mbuf chain. */
   23                         if (mtail != NULL)
   12                                 mtail->m_next = m0;
                              else
                                      m = m0;
                              mtail = m0;
                      }
      
                      if (vp != NULL)
   23                         VOP_UNLOCK(vp);
      
                      /* Keep track of bytes processed. */
                      off += space;
                      rem -= space;
      
                      /*
                       * Prepend header, if any.  Save pointer to first mbuf
                       * with a page.
                       */
   23                 if (hdrlen) {
      prepend_header:
                              m0 = mhtail->m_next = m;
                              m = mh;
                              mh = NULL;
                      } else
                              m0 = m;
      
                      if (m == NULL) {
                              KASSERT(softerr, ("%s: m NULL, no error", __func__));
                              error = softerr;
                              sendfile_iodone(sfio, NULL, 0, 0);
                              goto done;
                      }
      
                      /* Add the buffer chain to the socket buffer. */
                      KASSERT(m_length(m, NULL) == space + hdrlen,
                          ("%s: mlen %u space %d hdrlen %d",
                          __func__, m_length(m, NULL), space, hdrlen));
      
                      CURVNET_SET(so->so_vnet);
      #ifdef KERN_TLS
   24                 if (tls != NULL)
                              ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP);
      #endif
                      if (nios == 0) {
                              /*
                               * If sendfile_swapin() didn't initiate any I/Os,
                               * which happens if all data is cached in VM, or if
                               * the header consumed all socket buffer space and
                               * sfio is NULL, then we can send data right now
                               * without the PRUS_NOTREADY flag.
                               */
    1                         if (sfio != NULL)
   19                                 sendfile_iodone(sfio, NULL, 0, 0);
      #ifdef KERN_TLS
   20                         if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
                                      error = (*so->so_proto->pr_usrreqs->pru_send)
                                          (so, PRUS_NOTREADY, m, NULL, NULL, td);
                                      soref(so);
                                      ktls_enqueue(m, so, tls_enq_cnt);
                              } else
      #endif
                                      error = (*so->so_proto->pr_usrreqs->pru_send)
                                          (so, 0, m, NULL, NULL, td);
                      } else {
                              sfio->so = so;
                              sfio->m = m0;
                              sfio->npages = npages;
                              soref(so);
                              error = (*so->so_proto->pr_usrreqs->pru_send)
                                  (so, PRUS_NOTREADY, m, NULL, NULL, td);
                              sendfile_iodone(sfio, NULL, 0, 0);
                      }
   24                 CURVNET_RESTORE();
      
                      m = NULL;        /* pru_send always consumes */
                      if (error)
                              goto done;
                      sbytes += space + hdrlen;
                      if (hdrlen)
                              hdrlen = 0;
                      if (softerr) {
                              error = softerr;
                              goto done;
                      }
              }
      
              /*
               * Send trailers. Wimp out and use writev(2).
               */
   19         if (trl_uio != NULL) {
                      sbunlock(&so->so_snd);
                      error = kern_writev(td, sockfd, trl_uio);
    1                 if (error == 0)
    2                         sbytes += td->td_retval[0];
                      goto out;
              }
      
      done:
              sbunlock(&so->so_snd);
      out:
              /*
               * If there was no error we have to clear td->td_retval[0]
               * because it may have been set by writev.
               */
    1         if (error == 0) {
                      td->td_retval[0] = 0;
              }
              if (sent != NULL) {
   31                 (*sent) = sbytes;
              }
   31         if (obj != NULL)
                      vm_object_deallocate(obj);
    2         if (so)
                      fdrop(sock_fp, td);
              if (m)
                      m_freem(m);
   31         if (mh)
                      m_freem(mh);
      
   27         if (sfs != NULL) {
                      mtx_lock(&sfs->mtx);
    1                 if (sfs->count != 0)
                              error = cv_wait_sig(&sfs->cv, &sfs->mtx);
    3                 if (sfs->count == 0) {
                              sendfile_sync_destroy(sfs);
                      } else {
                              sfs->waiting = false;
                              mtx_unlock(&sfs->mtx);
                      }
              }
      #ifdef KERN_TLS
   31         if (tls != NULL)
                      ktls_free(tls);
      #endif
      
              if (error == ERESTART)
                      error = EINTR;
      
              return (error);
      }
      
      static int
      sendfile(struct thread *td, struct sendfile_args *uap, int compat)
   52 {
              struct sf_hdtr hdtr;
              struct uio *hdr_uio, *trl_uio;
              struct file *fp;
              off_t sbytes;
              int error;
      
              /*
               * File offset must be positive.  If it goes beyond EOF
               * we send only the header/trailer and no payload data.
               */
    1         if (uap->offset < 0)
                      return (EINVAL);
      
              sbytes = 0;
              hdr_uio = trl_uio = NULL;
      
   27         if (uap->hdtr != NULL) {
                      error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
    2                 if (error != 0)
                              goto out;
    7                 if (hdtr.headers != NULL) {
                              error = copyinuio(hdtr.headers, hdtr.hdr_cnt,
                                  &hdr_uio);
    2                         if (error != 0)
                                      goto out;
      #ifdef COMPAT_FREEBSD4
                              /*
                               * In FreeBSD < 5.0 the nbytes to send also included
                               * the header.  If compat is specified subtract the
                               * header size from nbytes.
                               */
   13                         if (compat) {
                                      if (uap->nbytes > hdr_uio->uio_resid)
                                              uap->nbytes -= hdr_uio->uio_resid;
                                      else
                                              uap->nbytes = 0;
                              }
      #endif
                      }
    9                 if (hdtr.trailers != NULL) {
                              error = copyinuio(hdtr.trailers, hdtr.trl_cnt,
                                  &trl_uio);
   11                         if (error != 0)
                                      goto out;
                      }
              }
      
   45         AUDIT_ARG_FD(uap->fd);
      
              /*
               * sendfile(2) can start at any offset within a file so we require
               * CAP_READ+CAP_SEEK = CAP_PREAD.
               */
    5         if ((error = fget_read(td, uap->fd, &cap_pread_rights, &fp)) != 0)
                      goto out;
      
              error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
                  uap->nbytes, &sbytes, uap->flags, td);
              fdrop(fp, td);
      
   28         if (uap->sbytes != NULL)
    6                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
      
      out:
              free(hdr_uio, M_IOV);
              free(trl_uio, M_IOV);
              return (error);
      }
      
      /*
       * sendfile(2)
       * 
       * int sendfile(int fd, int s, off_t offset, size_t nbytes,
       *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
       * 
       * Send a file specified by 'fd' and starting at 'offset' to a socket
       * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
       * 0.  Optionally add a header and/or trailer to the socket output.  If
       * specified, write the total number of bytes sent into *sbytes.
       */
      int
      sys_sendfile(struct thread *td, struct sendfile_args *uap)
   52 {
      
              return (sendfile(td, uap, 0));
      }
      
      #ifdef COMPAT_FREEBSD4
      int
      freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
      {
              struct sendfile_args args;
      
              args.fd = uap->fd;
              args.s = uap->s;
              args.offset = uap->offset;
              args.nbytes = uap->nbytes;
              args.hdtr = uap->hdtr;
              args.sbytes = uap->sbytes;
              args.flags = uap->flags;
      
              return (sendfile(td, &args, 1));
      }
      #endif /* COMPAT_FREEBSD4 */
      /*-
       * SPDX-License-Identifier: BSD-3-Clause
       *
       * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the project nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
       */
      
      /*-
       * Copyright (c) 1982, 1986, 1988, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)ip_icmp.c        8.2 (Berkeley) 1/4/94
       */
      
      #include <sys/cdefs.h>
      __FBSDID("$FreeBSD$");
      
      #define        MBUF_PRIVATE        /* XXXRW: Optimisation tries to avoid M_EXT mbufs */
      
      #include "opt_inet.h"
      #include "opt_inet6.h"
      
      #include <sys/param.h>
      #include <sys/domain.h>
      #include <sys/jail.h>
      #include <sys/kernel.h>
      #include <sys/lock.h>
      #include <sys/malloc.h>
      #include <sys/mbuf.h>
      #include <sys/proc.h>
      #include <sys/protosw.h>
      #include <sys/signalvar.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/sx.h>
      #include <sys/syslog.h>
      #include <sys/systm.h>
      #include <sys/time.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      #include <net/if_dl.h>
      #include <net/if_llatbl.h>
      #include <net/if_types.h>
      #include <net/route.h>
      #include <net/route/route_ctl.h>
      #include <net/route/nhop.h>
      #include <net/vnet.h>
      
      #include <netinet/in.h>
      #include <netinet/in_pcb.h>
      #include <netinet/in_var.h>
      #include <netinet/ip6.h>
      #include <netinet/icmp6.h>
      #include <netinet/tcp_var.h>
      
      #include <netinet6/in6_fib.h>
      #include <netinet6/in6_ifattach.h>
      #include <netinet6/in6_pcb.h>
      #include <netinet6/ip6protosw.h>
      #include <netinet6/ip6_var.h>
      #include <netinet6/scope6_var.h>
      #include <netinet6/mld6_var.h>
      #include <netinet6/nd6.h>
      #include <netinet6/send.h>
      
      extern struct domain inet6domain;
      
      VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
      VNET_PCPUSTAT_SYSINIT(icmp6stat);
      
      #ifdef VIMAGE
      VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
      #endif /* VIMAGE */
      
      VNET_DECLARE(struct inpcbinfo, ripcbinfo);
      VNET_DECLARE(struct inpcbhead, ripcb);
      VNET_DECLARE(int, icmp6errppslim);
      VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
      VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
      VNET_DECLARE(int, icmp6_nodeinfo);
      
      #define        V_ripcbinfo                        VNET(ripcbinfo)
      #define        V_ripcb                                VNET(ripcb)
      #define        V_icmp6errppslim                VNET(icmp6errppslim)
      #define        V_icmp6errpps_count                VNET(icmp6errpps_count)
      #define        V_icmp6errppslim_last                VNET(icmp6errppslim_last)
      #define        V_icmp6_nodeinfo                VNET(icmp6_nodeinfo)
      
      static void icmp6_errcount(int, int);
      static int icmp6_rip6_input(struct mbuf **, int);
      static void icmp6_reflect(struct mbuf *, size_t);
      static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
      static const char *icmp6_redirect_diag(struct in6_addr *,
              struct in6_addr *, struct in6_addr *);
      static struct mbuf *ni6_input(struct mbuf *, int, struct prison *);
      static struct mbuf *ni6_nametodns(const char *, int, int);
      static int ni6_dnsmatch(const char *, int, const char *, int);
      static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
                                struct ifnet **, struct in6_addr *);
      static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
                                      struct ifnet *, int);
      static int icmp6_notify_error(struct mbuf **, int, int, int);
      
      /*
       * Kernel module interface for updating icmp6stat.  The argument is an index
       * into icmp6stat treated as an array of u_quad_t.  While this encodes the
       * general layout of icmp6stat into the caller, it doesn't encode its
       * location, so that future changes to add, for example, per-CPU stats
       * support won't cause binary compatibility problems for kernel modules.
       */
      void
      kmod_icmp6stat_inc(int statnum)
      {
      
              counter_u64_add(VNET(icmp6stat)[statnum], 1);
      }
      
      static void
      icmp6_errcount(int type, int code)
   70 {
              switch (type) {
              case ICMP6_DST_UNREACH:
                      switch (code) {
                      case ICMP6_DST_UNREACH_NOROUTE:
                              ICMP6STAT_INC(icp6s_odst_unreach_noroute);
                              return;
                      case ICMP6_DST_UNREACH_ADMIN:
                              ICMP6STAT_INC(icp6s_odst_unreach_admin);
                              return;
                      case ICMP6_DST_UNREACH_BEYONDSCOPE:
                              ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
                              return;
                      case ICMP6_DST_UNREACH_ADDR:
                              ICMP6STAT_INC(icp6s_odst_unreach_addr);
                              return;
                      case ICMP6_DST_UNREACH_NOPORT:
                              ICMP6STAT_INC(icp6s_odst_unreach_noport);
                              return;
                      }
                      break;
              case ICMP6_PACKET_TOO_BIG:
                      ICMP6STAT_INC(icp6s_opacket_too_big);
                      return;
              case ICMP6_TIME_EXCEEDED:
                      switch (code) {
                      case ICMP6_TIME_EXCEED_TRANSIT:
                              ICMP6STAT_INC(icp6s_otime_exceed_transit);
                              return;
                      case ICMP6_TIME_EXCEED_REASSEMBLY:
                              ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
                              return;
                      }
                      break;
              case ICMP6_PARAM_PROB:
                      switch (code) {
                      case ICMP6_PARAMPROB_HEADER:
                              ICMP6STAT_INC(icp6s_oparamprob_header);
                              return;
                      case ICMP6_PARAMPROB_NEXTHEADER:
                              ICMP6STAT_INC(icp6s_oparamprob_nextheader);
                              return;
                      case ICMP6_PARAMPROB_OPTION:
                              ICMP6STAT_INC(icp6s_oparamprob_option);
                              return;
                      }
                      break;
              case ND_REDIRECT:
                      ICMP6STAT_INC(icp6s_oredirect);
                      return;
              }
              ICMP6STAT_INC(icp6s_ounknown);
      }
      
      /*
       * A wrapper function for icmp6_error() necessary when the erroneous packet
       * may not contain enough scope zone information.
       */
      void
      icmp6_error2(struct mbuf *m, int type, int code, int param,
          struct ifnet *ifp)
      {
              struct ip6_hdr *ip6;
      
              if (ifp == NULL)
                      return;
      
              if (m->m_len < sizeof(struct ip6_hdr)) {
                      m = m_pullup(m, sizeof(struct ip6_hdr));
                      if (m == NULL) {
                              IP6STAT_INC(ip6s_exthdrtoolong);
                              return;
                      }
              }
              ip6 = mtod(m, struct ip6_hdr *);
      
              if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
                      return;
              if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
                      return;
      
              icmp6_error(m, type, code, param);
      }
      
      /*
       * Generate an error packet of type error in response to bad IP6 packet.
       */
      void
      icmp6_error(struct mbuf *m, int type, int code, int param)
   70 {
              struct ip6_hdr *oip6, *nip6;
              struct icmp6_hdr *icmp6;
              u_int preplen;
              int off;
              int nxt;
      
              ICMP6STAT_INC(icp6s_error);
      
              /* count per-type-code statistics */
              icmp6_errcount(type, code);
      
      #ifdef M_DECRYPTED        /*not openbsd*/
              if (m->m_flags & M_DECRYPTED) {
                      ICMP6STAT_INC(icp6s_canterror);
                      goto freeit;
              }
      #endif
      
   70         if (m->m_len < sizeof(struct ip6_hdr)) {
                      m = m_pullup(m, sizeof(struct ip6_hdr));
                      if (m == NULL) {
                              IP6STAT_INC(ip6s_exthdrtoolong);
                              return;
                      }
              }
              oip6 = mtod(m, struct ip6_hdr *);
      
              /*
               * If the destination address of the erroneous packet is a multicast
               * address, or the packet was sent using link-layer multicast,
               * we should basically suppress sending an error (RFC 2463, Section
               * 2.4).
               * We have two exceptions (the item e.2 in that section):
               * - the Packet Too Big message can be sent for path MTU discovery.
               * - the Parameter Problem Message that can be allowed an icmp6 error
               *   in the option type field.  This check has been done in
               *   ip6_unknown_opt(), so we can just check the type and code.
               */
              if ((m->m_flags & (M_BCAST|M_MCAST) ||
   42              IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
   28             (type != ICMP6_PACKET_TOO_BIG &&
   37              (type != ICMP6_PARAM_PROB ||
                    code != ICMP6_PARAMPROB_OPTION)))
                      goto freeit;
      
              /*
               * RFC 2463, 2.4 (e.5): source address check.
               * XXX: the case of anycast source?
               */
   44         if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
                  IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
                      goto freeit;
      
              /*
               * If we are about to send ICMPv6 against ICMPv6 error/redirect,
               * don't do it.
               */
              nxt = -1;
              off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
   38         if (off >= 0 && nxt == IPPROTO_ICMPV6) {
                      struct icmp6_hdr *icp;
      
    2                 if (m->m_len < off + sizeof(struct icmp6_hdr)) {
                              m = m_pullup(m, off + sizeof(struct icmp6_hdr));
                              if (m == NULL) {
                                      IP6STAT_INC(ip6s_exthdrtoolong);
                                      return;
                              }
                      }
                      oip6 = mtod(m, struct ip6_hdr *);
                      icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
      
    1                 if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
                          icp->icmp6_type == ND_REDIRECT) {
                              /*
                               * ICMPv6 error
                               * Special case: for redirect (which is
                               * informational) we must not send icmp6 error.
                               */
                              ICMP6STAT_INC(icp6s_canterror);
                              goto freeit;
                      } else {
                              /* ICMPv6 informational - send the error */
                      }
              } else {
                      /* non-ICMPv6 - send the error */
              }
      
              /* Finally, do rate limitation check. */
              if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
                      ICMP6STAT_INC(icp6s_toofreq);
                      goto freeit;
              }
      
              /*
               * OK, ICMP6 can be generated.
               */
      
   33         if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
    6                 m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
      
              preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
   39         M_PREPEND(m, preplen, M_NOWAIT);        /* FIB is also copied over. */
              if (m == NULL) {
                      nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
                      return;
              }
      
              nip6 = mtod(m, struct ip6_hdr *);
              nip6->ip6_src  = oip6->ip6_src;
              nip6->ip6_dst  = oip6->ip6_dst;
      
              in6_clearscope(&oip6->ip6_src);
              in6_clearscope(&oip6->ip6_dst);
      
              icmp6 = (struct icmp6_hdr *)(nip6 + 1);
              icmp6->icmp6_type = type;
              icmp6->icmp6_code = code;
              icmp6->icmp6_pptr = htonl((u_int32_t)param);
      
              ICMP6STAT_INC(icp6s_outhist[type]);
              icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
      
              return;
      
        freeit:
              /*
               * If we can't tell whether or not we can generate ICMP6, free it.
               */
              m_freem(m);
      }
      
      /*
       * Process a received ICMP6 message.
       */
      int
      icmp6_input(struct mbuf **mp, int *offp, int proto)
  183 {
              struct mbuf *m, *n;
              struct ifnet *ifp;
              struct ip6_hdr *ip6, *nip6;
              struct icmp6_hdr *icmp6, *nicmp6;
              char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
              int code, error, icmp6len, ip6len, noff, off, sum;
      
              NET_EPOCH_ASSERT();
      
              m = *mp;
              off = *offp;
      
  182         if (m->m_len < off + sizeof(struct icmp6_hdr)) {
                      m = m_pullup(m, off + sizeof(struct icmp6_hdr));
                      if (m == NULL) {
                              IP6STAT_INC(ip6s_exthdrtoolong);
                              *mp = m;
                              return (IPPROTO_DONE);
                      }
              }
      
              /*
               * Locate icmp6 structure in mbuf, and check
               * that not corrupted and of at least minimum length
               */
      
              icmp6len = m->m_pkthdr.len - off;
              if (icmp6len < sizeof(struct icmp6_hdr)) {
                      ICMP6STAT_INC(icp6s_tooshort);
                      goto freeit;
              }
      
              ip6 = mtod(m, struct ip6_hdr *);
              ifp = m->m_pkthdr.rcvif;
              /*
               * Check multicast group membership.
               * Note: SSM filters are not applied for ICMPv6 traffic.
               */
   22         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                      struct in6_multi        *inm;
      
                      inm = in6m_lookup(ifp, &ip6->ip6_dst);
                      if (inm == NULL) {
                              IP6STAT_INC(ip6s_notmember);
                              in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
                              goto freeit;
                      }
              }
      
              /* Calculate the checksum. */
              icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
              code = icmp6->icmp6_code;
              if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
    1                 nd6log((LOG_ERR,
                          "ICMP6 checksum error(%d|%x) %s\n",
                          icmp6->icmp6_type, sum,
                          ip6_sprintf(ip6bufs, &ip6->ip6_src)));
                      ICMP6STAT_INC(icp6s_checksum);
                      goto freeit;
              }
      
              ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
              icmp6_ifstat_inc(ifp, ifs6_in_msg);
   97         if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
                      icmp6_ifstat_inc(ifp, ifs6_in_error);
      
              ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
    8         switch (icmp6->icmp6_type) {
              case ICMP6_DST_UNREACH:
                      icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
    7                 switch (code) {
                      case ICMP6_DST_UNREACH_NOROUTE:
                      case ICMP6_DST_UNREACH_ADDR:        /* PRC_HOSTDEAD is a DOS */
                              code = PRC_UNREACH_NET;
                              break;
                      case ICMP6_DST_UNREACH_ADMIN:
                              icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
                              code = PRC_UNREACH_ADMIN_PROHIB;
                              break;
                      case ICMP6_DST_UNREACH_BEYONDSCOPE:
                              /* I mean "source address was incorrect." */
                              code = PRC_PARAMPROB;
                              break;
                      case ICMP6_DST_UNREACH_NOPORT:
                              code = PRC_UNREACH_PORT;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
                      break;
      
              case ICMP6_PACKET_TOO_BIG:
                      icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
      
                      /* validation is made in icmp6_mtudisc_update */
      
                      code = PRC_MSGSIZE;
      
                      /*
                       * Updating the path MTU will be done after examining
                       * intermediate extension headers.
                       */
                      goto deliver;
                      break;
      
              case ICMP6_TIME_EXCEEDED:
                      icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
   11                 switch (code) {
                      case ICMP6_TIME_EXCEED_TRANSIT:
                              code = PRC_TIMXCEED_INTRANS;
                              break;
                      case ICMP6_TIME_EXCEED_REASSEMBLY:
                              code = PRC_TIMXCEED_REASS;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
                      break;
      
              case ICMP6_PARAM_PROB:
                      icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
   20                 switch (code) {
                      case ICMP6_PARAMPROB_NEXTHEADER:
                              code = PRC_UNREACH_PROTOCOL;
                              break;
                      case ICMP6_PARAMPROB_HEADER:
                      case ICMP6_PARAMPROB_OPTION:
                              code = PRC_PARAMPROB;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
                      break;
      
              case ICMP6_ECHO_REQUEST:
                      icmp6_ifstat_inc(ifp, ifs6_in_echo);
    1                 if (code != 0)
                              goto badcode;
                      if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
                              /* Give up remote */
                              break;
                      }
    3                 if (!M_WRITABLE(n)
    1                  || n->m_len < off + sizeof(struct icmp6_hdr)) {
                              struct mbuf *n0 = n;
                              int n0len;
      
                              CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
                              n = m_gethdr(M_NOWAIT, n0->m_type);
                              if (n == NULL) {
                                      /* Give up remote */
                                      m_freem(n0);
                                      break;
                              }
      
    1                         m_move_pkthdr(n, n0);        /* FIB copied. */
                              n0len = n0->m_pkthdr.len;        /* save for use below */
                              /*
                               * Copy IPv6 and ICMPv6 only.
                               */
                              nip6 = mtod(n, struct ip6_hdr *);
                              bcopy(ip6, nip6, sizeof(struct ip6_hdr));
                              nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
                              bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
                              noff = sizeof(struct ip6_hdr);
                              /* new mbuf contains only ipv6+icmpv6 headers */
                              n->m_len = noff + sizeof(struct icmp6_hdr);
                              /*
                               * Adjust mbuf.  ip6_plen will be adjusted in
                               * ip6_output().
                               */
                              m_adj(n0, off + sizeof(struct icmp6_hdr));
                              /* recalculate complete packet size */
                              n->m_pkthdr.len = n0len + (noff - off);
                              n->m_next = n0;
                      } else {
                              if (n->m_len < off + sizeof(*nicmp6)) {
                                      n = m_pullup(n, off + sizeof(*nicmp6));
                                      if (n == NULL) {
                                              IP6STAT_INC(ip6s_exthdrtoolong);
                                              break;
                                      }
                              }
                              nicmp6 = (struct icmp6_hdr *)(mtod(n, caddr_t) + off);
                              noff = off;
                      }
                      if (n) {
                              nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
                              nicmp6->icmp6_code = 0;
                              ICMP6STAT_INC(icp6s_reflect);
                              ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]);
                              icmp6_reflect(n, noff);
                      }
                      break;
      
              case ICMP6_ECHO_REPLY:
                      icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
    2                 if (code != 0)
                              goto badcode;
                      break;
      
              case MLD_LISTENER_QUERY:
              case MLD_LISTENER_REPORT:
              case MLD_LISTENER_DONE:
              case MLDV2_LISTENER_REPORT:
                      /*
                       * Drop MLD traffic which is not link-local, has a hop limit
                       * of greater than 1 hop, or which does not have the
                       * IPv6 HBH Router Alert option.
                       * As IPv6 HBH options are stripped in ip6_input() we must
                       * check an mbuf header flag.
                       * XXX Should we also sanity check that these messages
                       * were directed to a link-local multicast prefix?
                       */
    8                 if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
                              goto freeit;
                      if (mld_input(&m, off, icmp6len) != 0) {
                              *mp = NULL;
                              return (IPPROTO_DONE);
                      }
                      /* m stays. */
                      break;
      
              case ICMP6_WRUREQUEST:        /* ICMP6_FQDN_QUERY */
                  {
                      enum { WRU, FQDN } mode;
                      struct prison *pr;
      
                      if (!V_icmp6_nodeinfo)
                              break;
      
                      if (icmp6len == sizeof(struct icmp6_hdr) + 4)
                              mode = WRU;
   21                 else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
                              mode = FQDN;
                      else
                              goto badlen;
      
                      pr = NULL;
                      sx_slock(&allprison_lock);
   21                 TAILQ_FOREACH(pr, &allprison, pr_list)
                              if (pr->pr_vnet == ifp->if_vnet)
                                      break; 
                      sx_sunlock(&allprison_lock);
                      if (pr == NULL)
                              pr = curthread->td_ucred->cr_prison;
                      if (mode == FQDN) {
   21                         if (m->m_len < off + sizeof(struct icmp6_nodeinfo)) {
                                      m = m_pullup(m, off +
                                          sizeof(struct icmp6_nodeinfo));
                                      if (m == NULL) {
                                              IP6STAT_INC(ip6s_exthdrtoolong);
                                              *mp = m;
                                              return (IPPROTO_DONE);
                                      }
                              }
                              n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                              if (n)
                                      n = ni6_input(n, off, pr);
                              /* XXX meaningless if n == NULL */
                              noff = sizeof(struct ip6_hdr);
                      } else {
                              u_char *p;
                              int maxhlen, hlen;
      
                              /*
                               * XXX: this combination of flags is pointless,
                               * but should we keep this for compatibility?
                               */
                              if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
                                  ICMP6_NODEINFO_TMPADDROK)) !=
                                  (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
                                      break;
      
                              if (code != 0)
                                      goto badcode;
      
                              CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
                              n = m_gethdr(M_NOWAIT, m->m_type);
                              if (n == NULL) {
                                      /* Give up remote */
                                      break;
                              }
                              if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
                                      /*
                                       * Previous code did a blind M_COPY_PKTHDR
                                       * and said "just for rcvif".  If true, then
                                       * we could tolerate the dup failing (due to
                                       * the deep copy of the tag chain).  For now
                                       * be conservative and just fail.
                                       */
                                      m_free(n);
                                      n = NULL;
                                      break;
                              }
                              /*
                               * Copy IPv6 and ICMPv6 only.
                               */
                              nip6 = mtod(n, struct ip6_hdr *);
                              bcopy(ip6, nip6, sizeof(struct ip6_hdr));
                              nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
                              bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
                              p = (u_char *)(nicmp6 + 1);
                              bzero(p, 4);
      
                              maxhlen = M_TRAILINGSPACE(n) -
                                  (sizeof(*nip6) + sizeof(*nicmp6) + 4);
                              mtx_lock(&pr->pr_mtx);
                              hlen = strlen(pr->pr_hostname);
                              if (maxhlen > hlen)
                                      maxhlen = hlen;
                              /* meaningless TTL */
                              bcopy(pr->pr_hostname, p + 4, maxhlen);
                              mtx_unlock(&pr->pr_mtx);
                              noff = sizeof(struct ip6_hdr);
                              n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
                                      sizeof(struct icmp6_hdr) + 4 + maxhlen;
                              nicmp6->icmp6_type = ICMP6_WRUREPLY;
                              nicmp6->icmp6_code = 0;
                      }
                      if (n) {
                              ICMP6STAT_INC(icp6s_reflect);
                              ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]);
                              icmp6_reflect(n, noff);
                      }
                      break;
                  }
      
              case ICMP6_WRUREPLY:
    2                 if (code != 0)
                              goto badcode;
                      break;
      
              case ND_ROUTER_SOLICIT:
                      icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
    1                 if (code != 0)
                              goto badcode;
                      if (icmp6len < sizeof(struct nd_router_solicit))
                              goto badlen;
    1                 if (send_sendso_input_hook != NULL) {
                              if (m->m_len < off + icmp6len) {
                                      m = m_pullup(m, off + icmp6len);
                                      if (m == NULL) {
                                              IP6STAT_INC(ip6s_exthdrtoolong);
                                              *mp = NULL;
                                              return (IPPROTO_DONE);
                                      }
                              }
                              error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
                              if (error == 0) {
                                      m = NULL;
                                      goto freeit;
                              }
                      }
                      n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                      nd6_rs_input(m, off, icmp6len);
                      m = n;
    1                 if (m == NULL)
                              goto freeit;
                      break;
      
              case ND_ROUTER_ADVERT:
                      icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
                      if (code != 0)
                              goto badcode;
                      if (icmp6len < sizeof(struct nd_router_advert))
                              goto badlen;
    2                 if (send_sendso_input_hook != NULL) {
                              error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
                              if (error == 0) {
                                      m = NULL;
                                      goto freeit;
                              }
                      }
                      n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                      nd6_ra_input(m, off, icmp6len);
                      m = n;
    2                 if (m == NULL)
                              goto freeit;
                      break;
      
              case ND_NEIGHBOR_SOLICIT:
                      icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
    1                 if (code != 0)
                              goto badcode;
    1                 if (icmp6len < sizeof(struct nd_neighbor_solicit))
                              goto badlen;
   15                 if (send_sendso_input_hook != NULL) {
                              error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
                              if (error == 0) {
                                      m = NULL;
                                      goto freeit;
                              }
                      }
                      n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                      nd6_ns_input(m, off, icmp6len);
                      m = n;
   15                 if (m == NULL)
                              goto freeit;
                      break;
      
              case ND_NEIGHBOR_ADVERT:
                      icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
    1                 if (code != 0)
                              goto badcode;
                      if (icmp6len < sizeof(struct nd_neighbor_advert))
                              goto badlen;
   23                 if (send_sendso_input_hook != NULL) {
                              error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
                              if (error == 0) {
                                      m = NULL;
                                      goto freeit;
                              }
                      }
                      n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                      nd6_na_input(m, off, icmp6len);
                      m = n;
   23                 if (m == NULL)
                              goto freeit;
                      break;
      
              case ND_REDIRECT:
                      icmp6_ifstat_inc(ifp, ifs6_in_redirect);
    1                 if (code != 0)
                              goto badcode;
    1                 if (icmp6len < sizeof(struct nd_redirect))
                              goto badlen;
    9                 if (send_sendso_input_hook != NULL) {
                              error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
                              if (error == 0) {
                                      m = NULL;
                                      goto freeit;
                              }
                      }
                      n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
                      icmp6_redirect_input(m, off);
                      m = n;
    9                 if (m == NULL)
                              goto freeit;
                      break;
      
              case ICMP6_ROUTER_RENUMBERING:
    1                 if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
                          code != ICMP6_ROUTER_RENUMBERING_RESULT)
                              goto badcode;
    1                 if (icmp6len < sizeof(struct icmp6_router_renum))
                              goto badlen;
                      break;
      
              default:
   24                 nd6log((LOG_DEBUG,
                          "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
                          icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
                          ip6_sprintf(ip6bufd, &ip6->ip6_dst),
                          ifp ? ifp->if_index : 0));
    1                 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
                              /* ICMPv6 error: MUST deliver it by spec... */
                              code = PRC_NCMDS;
                              /* deliver */
                      } else {
                              /* ICMPv6 informational: MUST not deliver */
                              break;
                      }
              deliver:
   14                 if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
                              /* In this case, m should've been freed. */
                              *mp = NULL;
                              return (IPPROTO_DONE);
                      }
                      break;
      
              badcode:
                      ICMP6STAT_INC(icp6s_badcode);
                      break;
      
              badlen:
                      ICMP6STAT_INC(icp6s_badlen);
                      break;
              }
      
              /* deliver the packet to appropriate sockets */
              icmp6_rip6_input(&m, *offp);
      
              *mp = m;
              return (IPPROTO_DONE);
      
       freeit:
              m_freem(m);
              *mp = NULL;
              return (IPPROTO_DONE);
      }
      
      static int
      icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
      {
              struct mbuf *m;
              struct icmp6_hdr *icmp6;
              struct ip6_hdr *eip6;
              u_int32_t notifymtu;
              struct sockaddr_in6 icmp6src, icmp6dst;
      
   23         m = *mp;
      
              if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
                      ICMP6STAT_INC(icp6s_tooshort);
                      goto freeit;
              }
      
   75         if (m->m_len < off + sizeof(*icmp6) + sizeof(struct ip6_hdr)) {
                      m = m_pullup(m, off + sizeof(*icmp6) + sizeof(struct ip6_hdr));
                      if (m == NULL) {
                              IP6STAT_INC(ip6s_exthdrtoolong);
                              *mp = m;
                              return (-1);
                      }
              }
              icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
              eip6 = (struct ip6_hdr *)(icmp6 + 1);
      
              /* Detect the upper level protocol */
              {
                      void (*ctlfunc)(int, struct sockaddr *, void *);
                      u_int8_t nxt = eip6->ip6_nxt;
                      int eoff = off + sizeof(struct icmp6_hdr) +
                          sizeof(struct ip6_hdr);
                      struct ip6ctlparam ip6cp;
                      struct in6_addr *finaldst = NULL;
                      int icmp6type = icmp6->icmp6_type;
                      struct ip6_frag *fh;
                      struct ip6_rthdr *rth;
                      struct ip6_rthdr0 *rth0;
                      int rthlen;
      
                      while (1) { /* XXX: should avoid infinite loop explicitly? */
                              struct ip6_ext *eh;
      
   46                         switch (nxt) {
                              case IPPROTO_HOPOPTS:
                              case IPPROTO_DSTOPTS:
                              case IPPROTO_AH:
   24                                 if (m->m_len < eoff + sizeof(struct ip6_ext)) {
                                              m = m_pullup(m, eoff +
                                                  sizeof(struct ip6_ext));
                                              if (m == NULL) {
                                                      IP6STAT_INC(ip6s_exthdrtoolong);
                                                      *mp = m;
                                                      return (-1);
                                              }
                                      }
                                      eh = (struct ip6_ext *)
                                          (mtod(m, caddr_t) + eoff);
                                      if (nxt == IPPROTO_AH)
                                              eoff += (eh->ip6e_len + 2) << 2;
                                      else
                                              eoff += (eh->ip6e_len + 1) << 3;
                                      nxt = eh->ip6e_nxt;
                                      break;
                              case IPPROTO_ROUTING:
                                      /*
                                       * When the erroneous packet contains a
                                       * routing header, we should examine the
                                       * header to determine the final destination.
                                       * Otherwise, we can't properly update
                                       * information that depends on the final
                                       * destination (e.g. path MTU).
                                       */
   32                                 if (m->m_len < eoff + sizeof(*rth)) {
                                              m = m_pullup(m, eoff + sizeof(*rth));
                                              if (m == NULL) {
                                                      IP6STAT_INC(ip6s_exthdrtoolong);
                                                      *mp = m;
                                                      return (-1);
                                              }
                                      }
                                      rth = (struct ip6_rthdr *)
                                          (mtod(m, caddr_t) + eoff);
                                      rthlen = (rth->ip6r_len + 1) << 3;
                                      /*
                                       * XXX: currently there is no
                                       * officially defined type other
                                       * than type-0.
                                       * Note that if the segment left field
                                       * is 0, all intermediate hops must
                                       * have been passed.
                                       */
   14                                 if (rth->ip6r_segleft &&
                                          rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
                                              int hops;
      
   18                                         if (m->m_len < eoff + rthlen) {
                                                      m = m_pullup(m, eoff + rthlen);
                                                      if (m == NULL) {
                                                              IP6STAT_INC(
                                                                  ip6s_exthdrtoolong);
                                                              *mp = m;
                                                              return (-1);
                                                      }
                                              }
                                              rth0 = (struct ip6_rthdr0 *)
                                                  (mtod(m, caddr_t) + eoff);
                                              /* just ignore a bogus header */
   11                                         if ((rth0->ip6r0_len % 2) == 0 &&
                                                  (hops = rth0->ip6r0_len/2))
    7                                                 finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
                                      }
                                      eoff += rthlen;
                                      nxt = rth->ip6r_nxt;
                                      break;
                              case IPPROTO_FRAGMENT:
   12                                 if (m->m_len < eoff + sizeof(struct ip6_frag)) {
                                              m = m_pullup(m, eoff +
                                                  sizeof(struct ip6_frag));
                                              if (m == NULL) {
                                                      IP6STAT_INC(ip6s_exthdrtoolong);
                                                      *mp = m;
                                                      return (-1);
                                              }
                                      }
                                      fh = (struct ip6_frag *)(mtod(m, caddr_t) +
                                          eoff);
                                      /*
                                       * Data after a fragment header is meaningless
                                       * unless it is the first fragment, but
                                       * we'll go to the notify label for path MTU
                                       * discovery.
                                       */
    4                                 if (fh->ip6f_offlg & IP6F_OFF_MASK)
                                              goto notify;
      
                                      eoff += sizeof(struct ip6_frag);
                                      nxt = fh->ip6f_nxt;
                                      break;
                              default:
                                      /*
                                       * This case includes ESP and the No Next
                                       * Header.  In such cases going to the notify
                                       * label does not have any meaning
                                       * (i.e. ctlfunc will be NULL), but we go
                                       * anyway since we might have to update
                                       * path MTU information.
                                       */
                                      goto notify;
                              }
                      }
                notify:
                      icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
      
                      /*
                       * retrieve parameters from the inner IPv6 header, and convert
                       * them into sockaddr structures.
                       * XXX: there is no guarantee that the source or destination
                       * addresses of the inner packet are in the same scope as
                       * the addresses of the icmp packet.  But there is no other
                       * way to determine the zone.
                       */
                      eip6 = (struct ip6_hdr *)(icmp6 + 1);
      
                      bzero(&icmp6dst, sizeof(icmp6dst));
                      icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
                      icmp6dst.sin6_family = AF_INET6;
                      if (finaldst == NULL)
                              icmp6dst.sin6_addr = eip6->ip6_dst;
                      else
    2                         icmp6dst.sin6_addr = *finaldst;
    1                 if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
                              goto freeit;
                      bzero(&icmp6src, sizeof(icmp6src));
                      icmp6src.sin6_len = sizeof(struct sockaddr_in6);
                      icmp6src.sin6_family = AF_INET6;
                      icmp6src.sin6_addr = eip6->ip6_src;
    6                 if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
                              goto freeit;
                      icmp6src.sin6_flowinfo =
                          (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
      
                      if (finaldst == NULL)
                              finaldst = &eip6->ip6_dst;
                      ip6cp.ip6c_m = m;
                      ip6cp.ip6c_icmp6 = icmp6;
                      ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
                      ip6cp.ip6c_off = eoff;
                      ip6cp.ip6c_finaldst = finaldst;
                      ip6cp.ip6c_src = &icmp6src;
                      ip6cp.ip6c_nxt = nxt;
      
   19                 if (icmp6type == ICMP6_PACKET_TOO_BIG) {
   14                         notifymtu = ntohl(icmp6->icmp6_mtu);
                              ip6cp.ip6c_cmdarg = (void *)&notifymtu;
                              icmp6_mtudisc_update(&ip6cp, 1);        /*XXX*/
                      }
      
                      ctlfunc = (void (*)(int, struct sockaddr *, void *))
                          (inet6sw[ip6_protox[nxt]].pr_ctlinput);
    3                 if (ctlfunc) {
   30                         (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
                                  &ip6cp);
                      }
              }
              *mp = m;
              return (0);
      
        freeit:
              m_freem(m);
              *mp = NULL;
              return (-1);
      }
      
      void
      icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
   14 {
              struct in6_addr *dst = ip6cp->ip6c_finaldst;
              struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
              struct mbuf *m = ip6cp->ip6c_m;        /* will be necessary for scope issue */
              u_int mtu = ntohl(icmp6->icmp6_mtu);
              struct in_conninfo inc;
      
      #if 0
              /*
               * RFC2460 section 5, last paragraph.
               * even though minimum link MTU for IPv6 is IPV6_MMTU,
               * we may see ICMPv6 too big with mtu < IPV6_MMTU
               * due to packet translator in the middle.
               * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
               * special handling.
               */
              if (mtu < IPV6_MMTU)
                      return;
      #endif
      
              /*
               * we reject ICMPv6 too big with abnormally small value.
               * XXX what is the good definition of "abnormally small"?
               */
    7         if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
                      return;
      
              if (!validated)
                      return;
      
              /*
               * In case the suggested mtu is less than IPV6_MMTU, we
               * only need to remember that it was for above mentioned
               * "alwaysfrag" case.
               * Try to be as close to the spec as possible.
               */
              if (mtu < IPV6_MMTU)
                      mtu = IPV6_MMTU - 8;
      
              bzero(&inc, sizeof(inc));
              inc.inc_fibnum = M_GETFIB(m);
              inc.inc_flags |= INC_ISIPV6;
              inc.inc6_faddr = *dst;
              if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
                      return;
      
    5         if (mtu < tcp_maxmtu6(&inc, NULL)) {
                      tcp_hc_updatemtu(&inc, mtu);
                      ICMP6STAT_INC(icp6s_pmtuchg);
              }
      }
      
      /*
       * Process a Node Information Query packet, based on
       * draft-ietf-ipngwg-icmp-name-lookups-07.
       *
       * Spec incompatibilities:
       * - IPv6 Subject address handling
       * - IPv4 Subject address handling support missing
       * - Proxy reply (answer even if it's not for me)
       * - joins NI group address at in6_ifattach() time only, does not cope
       *   with hostname changes by sethostname(3)
       */
      static struct mbuf *
      ni6_input(struct mbuf *m, int off, struct prison *pr)
      {
              struct icmp6_nodeinfo *ni6, *nni6;
              struct mbuf *n = NULL;
              u_int16_t qtype;
              int subjlen;
              int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
              struct ni_reply_fqdn *fqdn;
              int addrs;                /* for NI_QTYPE_NODEADDR */
              struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
              struct in6_addr in6_subj; /* subject address */
              struct ip6_hdr *ip6;
              int oldfqdn = 0;        /* if 1, return pascal string (03 draft) */
              char *subj = NULL;
              struct in6_ifaddr *ia6 = NULL;
      
              ip6 = mtod(m, struct ip6_hdr *);
              ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
      
              /*
               * Validate IPv6 source address.
               * The default configuration MUST be to refuse answering queries from
               * global-scope addresses according to RFC4602.
               * Notes:
               *  - it's not very clear what "refuse" means; this implementation
               *    simply drops it.
               *  - it's not very easy to identify global-scope (unicast) addresses
               *    since there are many prefixes for them.  It should be safer
               *    and in practice sufficient to check "all" but loopback and
               *    link-local (note that site-local unicast was deprecated and
               *    ULA is defined as global scope-wise)
               */
   17         if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
    3             !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
    1             !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
                      goto bad;
      
              /*
               * Validate IPv6 destination address.
               *
               * The Responder must discard the Query without further processing
               * unless it is one of the Responder's unicast or anycast addresses, or
               * a link-local scope multicast address which the Responder has joined.
               * [RFC4602, Section 5.]
               */
              if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
   10                 if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
                              goto bad;
                      /* else it's a link-local multicast, fine */
              } else {                /* unicast or anycast */
                      ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
                      if (ia6 == NULL)
                              goto bad; /* XXX impossible */
      
    6                 if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
                          !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
                              ifa_free(&ia6->ia_ifa);
                              nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
                                      "a temporary address in %s:%d",
                                     __FILE__, __LINE__));
                              goto bad;
                      }
                      ifa_free(&ia6->ia_ifa);
              }
      
              /* validate query Subject field. */
              qtype = ntohs(ni6->ni_qtype);
              subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
   16         switch (qtype) {
              case NI_QTYPE_NOOP:
              case NI_QTYPE_SUPTYPES:
                      /* 07 draft */
    7                 if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
                              break;
                      /* FALLTHROUGH */
              case NI_QTYPE_FQDN:
              case NI_QTYPE_NODEADDR:
              case NI_QTYPE_IPV4ADDR:
    3                 switch (ni6->ni_code) {
                      case ICMP6_NI_SUBJ_IPV6:
      #if ICMP6_NI_SUBJ_IPV6 != 0
                      case 0:
      #endif
                              /*
                               * backward compatibility - try to accept 03 draft
                               * format, where no Subject is present.
                               */
                              if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
                                  subjlen == 0) {
                                      oldfqdn++;
                                      break;
                              }
      #if ICMP6_NI_SUBJ_IPV6 != 0
                              if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
                                      goto bad;
      #endif
      
    6                         if (subjlen != sizeof(struct in6_addr))
                                      goto bad;
      
                              /*
                               * Validate Subject address.
                               *
                               * Not sure what exactly "address belongs to the node"
                               * means in the spec, is it just unicast, or what?
                               *
                               * At this moment we consider Subject address as
                               * "belong to the node" if the Subject address equals
                               * to the IPv6 destination address; validation for
                               * IPv6 destination address should have done enough
                               * check for us.
                               *
                               * We do not do proxy at this moment.
                               */
                              m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
                                  subjlen, (caddr_t)&in6_subj);
                              if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
                                      goto bad;
      
                              subj = (char *)&in6_subj;
                              if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
                                      break;
      
                              /*
                               * XXX if we are to allow other cases, we should really
                               * be careful about scope here.
                               * basically, we should disallow queries toward IPv6
                               * destination X with subject Y,
                               * if scope(X) > scope(Y).
                               * if we allow scope(X) > scope(Y), it will result in
                               * information leakage across scope boundary.
                               */
                              goto bad;
      
                      case ICMP6_NI_SUBJ_FQDN:
                              /*
                               * Validate Subject name with gethostname(3).
                               *
                               * The behavior may need some debate, since:
                               * - we are not sure if the node has FQDN as
                               *   hostname (returned by gethostname(3)).
                               * - the code does wildcard match for truncated names.
                               *   however, we are not sure if we want to perform
                               *   wildcard match, if gethostname(3) side has
                               *   truncated hostname.
                               */
                              mtx_lock(&pr->pr_mtx);
                              n = ni6_nametodns(pr->pr_hostname,
                                  strlen(pr->pr_hostname), 0);
                              mtx_unlock(&pr->pr_mtx);
                              if (!n || n->m_next || n->m_len == 0)
                                      goto bad;
    5                         if (m->m_len < off + sizeof(struct icmp6_nodeinfo) +
                                  subjlen) {
                                      m = m_pullup(m, off +
                                          sizeof(struct icmp6_nodeinfo) + subjlen);
                                      if (m == NULL) {
                                              IP6STAT_INC(ip6s_exthdrtoolong);
                                              goto bad;
                                      }
                              }
                              /* ip6 possibly invalid but not used after. */
                              ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
                              subj = (char *)(mtod(m, caddr_t) + off +
                                  sizeof(struct icmp6_nodeinfo));
                              if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
                                  n->m_len)) {
                                      goto bad;
                              }
                              m_freem(n);
                              n = NULL;
                              break;
      
                      case ICMP6_NI_SUBJ_IPV4:        /* XXX: to be implemented? */
                      default:
                              goto bad;
                      }
                      break;
              }
      
              /* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
    2         switch (qtype) {
              case NI_QTYPE_FQDN:
                      if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
                              goto bad;
                      break;
              case NI_QTYPE_NODEADDR:
              case NI_QTYPE_IPV4ADDR:
                      if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
                              goto bad;
                      break;
              }
      
              /* guess reply length */
              switch (qtype) {
              case NI_QTYPE_NOOP:
                      break;                /* no reply data */
              case NI_QTYPE_SUPTYPES:
                      replylen += sizeof(u_int32_t);
                      break;
              case NI_QTYPE_FQDN:
                      /* XXX will append an mbuf */
                      replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
                      break;
              case NI_QTYPE_NODEADDR:
                      addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
                      if ((replylen += addrs * (sizeof(struct in6_addr) +
                          sizeof(u_int32_t))) > MCLBYTES)
                              replylen = MCLBYTES; /* XXX: will truncate pkt later */
                      break;
              case NI_QTYPE_IPV4ADDR:
                      /* unsupported - should respond with unknown Qtype? */
                      break;
              default:
                      /*
                       * XXX: We must return a reply with the ICMP6 code
                       * `unknown Qtype' in this case.  However we regard the case
                       * as an FQDN query for backward compatibility.
                       * Older versions set a random value to this field,
                       * so it rarely varies in the defined qtypes.
                       * But the mechanism is not reliable...
                       * maybe we should obsolete older versions.
                       */
                      qtype = NI_QTYPE_FQDN;
                      /* XXX will append an mbuf */
                      replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
                      oldfqdn++;
                      break;
              }
      
              /* Allocate an mbuf to reply. */
              if (replylen > MCLBYTES) {
                      /*
                       * XXX: should we try to allocate more? But MCLBYTES
                       * is probably much larger than IPV6_MMTU...
                       */
                      goto bad;
              }
              if (replylen > MHLEN)
                      n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
              else
                      n = m_gethdr(M_NOWAIT, m->m_type);
              if (n == NULL) {
                      m_freem(m);
                      return (NULL);
              }
              m_move_pkthdr(n, m); /* just for recvif and FIB */
              n->m_pkthdr.len = n->m_len = replylen;
      
              /* copy mbuf header and IPv6 + Node Information base headers */
              bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
              nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
              bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
      
              /* qtype dependent procedure */
              switch (qtype) {
              case NI_QTYPE_NOOP:
                      nni6->ni_code = ICMP6_NI_SUCCESS;
                      nni6->ni_flags = 0;
                      break;
              case NI_QTYPE_SUPTYPES:
              {
                      u_int32_t v;
                      nni6->ni_code = ICMP6_NI_SUCCESS;
                      nni6->ni_flags = htons(0x0000);        /* raw bitmap */
                      /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
                      v = (u_int32_t)htonl(0x0000000f);
                      bcopy(&v, nni6 + 1, sizeof(u_int32_t));
                      break;
              }
              case NI_QTYPE_FQDN:
                      nni6->ni_code = ICMP6_NI_SUCCESS;
                      fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
                          sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
                      nni6->ni_flags = 0; /* XXX: meaningless TTL */
                      fqdn->ni_fqdn_ttl = 0;        /* ditto. */
                      /*
                       * XXX do we really have FQDN in hostname?
                       */
                      mtx_lock(&pr->pr_mtx);
                      n->m_next = ni6_nametodns(pr->pr_hostname,
                          strlen(pr->pr_hostname), oldfqdn);
                      mtx_unlock(&pr->pr_mtx);
                      if (n->m_next == NULL)
                              goto bad;
                      /* XXX we assume that n->m_next is not a chain */
                      if (n->m_next->m_next != NULL)
                              goto bad;
    2                 n->m_pkthdr.len += n->m_next->m_len;
                      break;
              case NI_QTYPE_NODEADDR:
              {
                      int lenlim, copied;
      
                      nni6->ni_code = ICMP6_NI_SUCCESS;
                      n->m_pkthdr.len = n->m_len =
                          sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
                      lenlim = M_TRAILINGSPACE(n);
                      copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
                      /* XXX: reset mbuf length */
                      n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
                          sizeof(struct icmp6_nodeinfo) + copied;
                      break;
              }
              default:
                      break;                /* XXX impossible! */
              }
      
              nni6->ni_type = ICMP6_NI_REPLY;
              m_freem(m);
              return (n);
      
        bad:
              m_freem(m);
              if (n)
                      m_freem(n);
              return (NULL);
      }
      
      /*
       * make a mbuf with DNS-encoded string.  no compression support.
       *
       * XXX names with less than 2 dots (like "foo" or "foo.section") will be
       * treated as truncated name (two \0 at the end).  this is a wild guess.
       *
       * old - return pascal string if non-zero
       */
      static struct mbuf *
      ni6_nametodns(const char *name, int namelen, int old)
    7 {
              struct mbuf *m;
              char *cp, *ep;
              const char *p, *q;
              int i, len, nterm;
      
              if (old)
                      len = namelen + 1;
              else
                      len = MCLBYTES;
      
              /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
              if (len > MLEN)
                      m = m_getcl(M_NOWAIT, MT_DATA, 0);
              else
                      m = m_get(M_NOWAIT, MT_DATA);
              if (m == NULL)
                      goto fail;
      
              if (old) {
    2                 m->m_len = len;
                      *mtod(m, char *) = namelen;
                      bcopy(name, mtod(m, char *) + 1, namelen);
                      return m;
              } else {
                      m->m_len = 0;
                      cp = mtod(m, char *);
    5                 ep = mtod(m, char *) + M_TRAILINGSPACE(m);
      
                      /* if not certain about my name, return empty buffer */
                      if (namelen == 0)
                              return m;
      
                      /*
                       * guess if it looks like shortened hostname, or FQDN.
                       * shortened hostname needs two trailing "\0".
                       */
                      i = 0;
    5                 for (p = name; p < name + namelen; p++) {
                              if (*p && *p == '.')
                                      i++;
                      }
                      if (i < 2)
                              nterm = 2;
                      else
                              nterm = 1;
      
                      p = name;
    5                 while (cp < ep && p < name + namelen) {
                              i = 0;
    5                         for (q = p; q < name + namelen && *q && *q != '.'; q++)
                                      i++;
                              /* result does not fit into mbuf */
                              if (cp + i + 1 >= ep)
                                      goto fail;
                              /*
                               * DNS label length restriction, RFC1035 page 8.
                               * "i == 0" case is included here to avoid returning
                               * 0-length label on "foo..bar".
                               */
                              if (i <= 0 || i >= 64)
                                      goto fail;
                              *cp++ = i;
                              bcopy(p, cp, i);
                              cp += i;
                              p = q;
    5                         if (p < name + namelen && *p == '.')
                                      p++;
                      }
                      /* termination */
                      if (cp + nterm >= ep)
                              goto fail;
    5                 while (nterm-- > 0)
                              *cp++ = '\0';
                      m->m_len = cp - mtod(m, char *);
                      return m;
              }
      
              panic("should not reach here");
              /* NOTREACHED */
      
       fail:
              if (m)
                      m_freem(m);
              return NULL;
      }
      
      /*
       * check if two DNS-encoded string matches.  takes care of truncated
       * form (with \0\0 at the end).  no compression support.
       * XXX upper/lowercase match (see RFC2065)
       */
      static int
      ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
      {
              const char *a0, *b0;
              int l;
      
              /* simplest case - need validation? */
    5         if (alen == blen && bcmp(a, b, alen) == 0)
                      return 1;
      
              a0 = a;
              b0 = b;
      
              /* termination is mandatory */
              if (alen < 2 || blen < 2)
                      return 0;
    1         if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
                      return 0;
              alen--;
              blen--;
      
              while (a - a0 < alen && b - b0 < blen) {
                      if (a - a0 + 1 > alen || b - b0 + 1 > blen)
                              return 0;
      
    2                 if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
                              return 0;
                      /* we don't support compression yet */
                      if (a[0] >= 64 || b[0] >= 64)
                              return 0;
      
                      /* truncated case */
                      if (a[0] == 0 && a - a0 == alen - 1)
                              return 1;
                      if (b[0] == 0 && b - b0 == blen - 1)
                              return 1;
    2                 if (a[0] == 0 || b[0] == 0)
                              return 0;
      
                      if (a[0] != b[0])
                              return 0;
                      l = a[0];
                      if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
                              return 0;
                      if (bcmp(a + 1, b + 1, l) != 0)
                              return 0;
      
                      a += 1 + l;
                      b += 1 + l;
              }
      
              if (a - a0 == alen && b - b0 == blen)
                      return 1;
              else
                      return 0;
      }
      
      /*
       * calculate the number of addresses to be returned in the node info reply.
       */
      static int
      ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
          struct in6_addr *subj)
      {
              struct ifnet *ifp;
              struct in6_ifaddr *ifa6;
              struct ifaddr *ifa;
              int addrs = 0, addrsofif, iffound = 0;
              int niflags = ni6->ni_flags;
      
              NET_EPOCH_ASSERT();
      
              if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
                      switch (ni6->ni_code) {
                      case ICMP6_NI_SUBJ_IPV6:
                              if (subj == NULL) /* must be impossible... */
                                      return (0);
                              break;
                      default:
                              /*
                               * XXX: we only support IPv6 subject address for
                               * this Qtype.
                               */
                              return (0);
                      }
              }
      
              CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
                      addrsofif = 0;
                      CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
                              if (ifa->ifa_addr->sa_family != AF_INET6)
                                      continue;
                              ifa6 = (struct in6_ifaddr *)ifa;
      
                              if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
                                  IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
                                      iffound = 1;
      
                              /*
                               * IPv4-mapped addresses can only be returned by a
                               * Node Information proxy, since they represent
                               * addresses of IPv4-only nodes, which perforce do
                               * not implement this protocol.
                               * [icmp-name-lookups-07, Section 5.4]
                               * So we don't support NI_NODEADDR_FLAG_COMPAT in
                               * this function at this moment.
                               */
      
                              /* What do we have to do about ::1? */
                              switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
                              case IPV6_ADDR_SCOPE_LINKLOCAL:
                                      if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
                                              continue;
                                      break;
                              case IPV6_ADDR_SCOPE_SITELOCAL:
                                      if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
                                              continue;
                                      break;
                              case IPV6_ADDR_SCOPE_GLOBAL:
                                      if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
                                              continue;
                                      break;
                              default:
                                      continue;
                              }
      
                              /*
                               * check if anycast is okay.
                               * XXX: just experimental.  not in the spec.
                               */
                              if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
                                  (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
                                      continue; /* we need only unicast addresses */
                              if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
                                  (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
                                      continue;
                              }
                              addrsofif++; /* count the address */
                      }
                      if (iffound) {
                              *ifpp = ifp;
                              return (addrsofif);
                      }
      
                      addrs += addrsofif;
              }
      
              return (addrs);
      }
      
      static int
      ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
          struct ifnet *ifp0, int resid)
      {
              struct ifnet *ifp;
              struct in6_ifaddr *ifa6;
              struct ifaddr *ifa;
              struct ifnet *ifp_dep = NULL;
              int copied = 0, allow_deprecated = 0;
              u_char *cp = (u_char *)(nni6 + 1);
              int niflags = ni6->ni_flags;
              u_int32_t ltime;
      
              NET_EPOCH_ASSERT();
      
              if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
                      return (0);        /* needless to copy */
      
              ifp = ifp0 ? ifp0 : CK_STAILQ_FIRST(&V_ifnet);
        again:
      
              for (; ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) {
                      CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
                              if (ifa->ifa_addr->sa_family != AF_INET6)
                                      continue;
                              ifa6 = (struct in6_ifaddr *)ifa;
      
                              if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
                                  allow_deprecated == 0) {
                                      /*
                                       * prefererred address should be put before
                                       * deprecated addresses.
                                       */
      
                                      /* record the interface for later search */
                                      if (ifp_dep == NULL)
                                              ifp_dep = ifp;
      
                                      continue;
                              } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
                                  allow_deprecated != 0)
                                      continue; /* we now collect deprecated addrs */
      
                              /* What do we have to do about ::1? */
                              switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
                              case IPV6_ADDR_SCOPE_LINKLOCAL:
                                      if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
                                              continue;
                                      break;
                              case IPV6_ADDR_SCOPE_SITELOCAL:
                                      if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
                                              continue;
                                      break;
                              case IPV6_ADDR_SCOPE_GLOBAL:
                                      if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
                                              continue;
                                      break;
                              default:
                                      continue;
                              }
      
                              /*
                               * check if anycast is okay.
                               * XXX: just experimental.  not in the spec.
                               */
                              if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
                                  (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
                                      continue;
                              if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
                                  (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
                                      continue;
                              }
      
                              /* now we can copy the address */
                              if (resid < sizeof(struct in6_addr) +
                                  sizeof(u_int32_t)) {
                                      /*
                                       * We give up much more copy.
                                       * Set the truncate flag and return.
                                       */
                                      nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
                                      return (copied);
                              }
      
                              /*
                               * Set the TTL of the address.
                               * The TTL value should be one of the following
                               * according to the specification:
                               *
                               * 1. The remaining lifetime of a DHCP lease on the
                               *    address, or
                               * 2. The remaining Valid Lifetime of a prefix from
                               *    which the address was derived through Stateless
                               *    Autoconfiguration.
                               *
                               * Note that we currently do not support stateful
                               * address configuration by DHCPv6, so the former
                               * case can't happen.
                               */
                              if (ifa6->ia6_lifetime.ia6t_expire == 0)
                                      ltime = ND6_INFINITE_LIFETIME;
                              else {
                                      if (ifa6->ia6_lifetime.ia6t_expire >
                                          time_uptime)
                                              ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
                                      else
                                              ltime = 0;
                              }
      
                              bcopy(&ltime, cp, sizeof(u_int32_t));
                              cp += sizeof(u_int32_t);
      
                              /* copy the address itself */
                              bcopy(&ifa6->ia_addr.sin6_addr, cp,
                                  sizeof(struct in6_addr));
                              in6_clearscope((struct in6_addr *)cp); /* XXX */
                              cp += sizeof(struct in6_addr);
      
                              resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
                              copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
                      }
                      if (ifp0)        /* we need search only on the specified IF */
                              break;
              }
      
              if (allow_deprecated == 0 && ifp_dep != NULL) {
                      ifp = ifp_dep;
                      allow_deprecated = 1;
      
                      goto again;
              }
      
              return (copied);
      }
      
      /*
       * XXX almost dup'ed code with rip6_input.
       */
      static int
      icmp6_rip6_input(struct mbuf **mp, int off)
      {
              struct mbuf *m = *mp;
              struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
              struct inpcb *inp;
              struct inpcb *last = NULL;
              struct sockaddr_in6 fromsa;
              struct icmp6_hdr *icmp6;
              struct mbuf *opts = NULL;
      
              NET_EPOCH_ASSERT();
      
              /* This is assumed to be safe; icmp6_input() does a pullup. */
              icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
      
              /*
               * XXX: the address may have embedded scope zone ID, which should be
               * hidden from applications.
               */
              bzero(&fromsa, sizeof(fromsa));
              fromsa.sin6_family = AF_INET6;
              fromsa.sin6_len = sizeof(struct sockaddr_in6);
              fromsa.sin6_addr = ip6->ip6_src;
              if (sa6_recoverscope(&fromsa)) {
                      m_freem(m);
                      *mp = NULL;
                      return (IPPROTO_DONE);
              }
      
  125         CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
  125                 if ((inp->inp_vflag & INP_IPV6) == 0)
                              continue;
    6                 if (inp->inp_ip_p != IPPROTO_ICMPV6)
                              continue;
   10                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
                         !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
                              continue;
    9                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                         !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
                              continue;
                      INP_RLOCK(inp);
                      if (__predict_false(inp->inp_flags2 & INP_FREED)) {
                              INP_RUNLOCK(inp);
                              continue;
                      }
                      if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
                          inp->in6p_icmp6filt)) {
    1                         INP_RUNLOCK(inp);
                              continue;
                      }
    9                 if (last != NULL) {
                              struct        mbuf *n = NULL;
      
                              /*
                               * Recent network drivers tend to allocate a single
                               * mbuf cluster, rather than to make a couple of
                               * mbufs without clusters.  Also, since the IPv6 code
                               * path tries to avoid m_pullup(), it is highly
                               * probable that we still have an mbuf cluster here
                               * even though the necessary length can be stored in an
                               * mbuf's internal buffer.
                               * Meanwhile, the default size of the receive socket
                               * buffer for raw sockets is not so large.  This means
                               * the possibility of packet loss is relatively higher
                               * than before.  To avoid this scenario, we copy the
                               * received data to a separate mbuf that does not use
                               * a cluster, if possible.
                               * XXX: it is better to copy the data after stripping
                               * intermediate headers.
                               */
    1                         if ((m->m_flags & M_EXT) && m->m_next == NULL &&
                                  m->m_len <= MHLEN) {
                                      n = m_get(M_NOWAIT, m->m_type);
                                      if (n != NULL) {
                                              if (m_dup_pkthdr(n, m, M_NOWAIT)) {
                                                      bcopy(m->m_data, n->m_data,
                                                            m->m_len);
                                                      n->m_len = m->m_len;
                                              } else {
                                                      m_free(n);
                                                      n = NULL;
                                              }
                                      }
                              }
    1                         if (n != NULL ||
                                  (n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
                                      if (last->inp_flags & INP_CONTROLOPTS)
                                              ip6_savecontrol(last, n, &opts);
                                      /* strip intermediate headers */
                                      m_adj(n, off);
                                      SOCKBUF_LOCK(&last->inp_socket->so_rcv);
                                      if (sbappendaddr_locked(
                                          &last->inp_socket->so_rcv,
                                          (struct sockaddr *)&fromsa, n, opts)
                                          == 0) {
                                              /* should notify about lost packet */
                                              m_freem(n);
                                              if (opts) {
                                                      m_freem(opts);
                                              }
                                              SOCKBUF_UNLOCK(
                                                  &last->inp_socket->so_rcv);
                                      } else
    1                                         sorwakeup_locked(last->inp_socket);
                                      opts = NULL;
                              }
                              INP_RUNLOCK(last);
                      }
                      last = inp;
              }
  121         if (last != NULL) {
                      if (last->inp_flags & INP_CONTROLOPTS)
                              ip6_savecontrol(last, m, &opts);
                      /* strip intermediate headers */
                      m_adj(m, off);
      
                      /* avoid using mbuf clusters if possible (see above) */
    8                 if ((m->m_flags & M_EXT) && m->m_next == NULL &&
                          m->m_len <= MHLEN) {
                              struct mbuf *n;
      
                              n = m_get(M_NOWAIT, m->m_type);
                              if (n != NULL) {
                                      if (m_dup_pkthdr(n, m, M_NOWAIT)) {
    1                                         bcopy(m->m_data, n->m_data, m->m_len);
                                              n->m_len = m->m_len;
      
                                              m_freem(m);
                                              m = n;
                                      } else {
                                              m_freem(n);
                                              n = NULL;
                                      }
                              }
                      }
                      SOCKBUF_LOCK(&last->inp_socket->so_rcv);
                      if (sbappendaddr_locked(&last->inp_socket->so_rcv,
                          (struct sockaddr *)&fromsa, m, opts) == 0) {
                              m_freem(m);
    1                         if (opts)
                                      m_freem(opts);
                              SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
                      } else
    8                         sorwakeup_locked(last->inp_socket);
                      INP_RUNLOCK(last);
              } else {
                      m_freem(m);
                      IP6STAT_DEC(ip6s_delivered);
              }
              *mp = NULL;
              return (IPPROTO_DONE);
      }
      
      /*
       * Reflect the ip6 packet back to the source.
       * OFF points to the icmp6 header, counted from the top of the mbuf.
       */
      static void
      icmp6_reflect(struct mbuf *m, size_t off)
   45 {
              struct in6_addr src6, *srcp;
              struct ip6_hdr *ip6;
              struct icmp6_hdr *icmp6;
              struct in6_ifaddr *ia = NULL;
              struct ifnet *outif = NULL;
              int plen;
              int type, code, hlim;
      
              /* too short to reflect */
              if (off < sizeof(struct ip6_hdr)) {
                      nd6log((LOG_DEBUG,
                          "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
                          (u_long)off, (u_long)sizeof(struct ip6_hdr),
                          __FILE__, __LINE__));
                      goto bad;
              }
      
              /*
               * If there are extra headers between IPv6 and ICMPv6, strip
               * off that header first.
               */
      #ifdef DIAGNOSTIC
              if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
                      panic("assumption failed in icmp6_reflect");
      #endif
              if (off > sizeof(struct ip6_hdr)) {
                      size_t l;
                      struct ip6_hdr nip6;
      
                      l = off - sizeof(struct ip6_hdr);
                      m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
                      m_adj(m, l);
                      l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
    1                 if (m->m_len < l) {
                              if ((m = m_pullup(m, l)) == NULL)
                                      return;
                      }
                      bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
              } else /* off == sizeof(struct ip6_hdr) */ {
                      size_t l;
                      l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
   44                 if (m->m_len < l) {
                              if ((m = m_pullup(m, l)) == NULL)
                                      return;
                      }
              }
              plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
              ip6 = mtod(m, struct ip6_hdr *);
              ip6->ip6_nxt = IPPROTO_ICMPV6;
              icmp6 = (struct icmp6_hdr *)(ip6 + 1);
              type = icmp6->icmp6_type; /* keep type for statistics */
              code = icmp6->icmp6_code; /* ditto. */
              hlim = 0;
              srcp = NULL;
      
              /*
               * If the incoming packet was addressed directly to us (i.e. unicast),
               * use dst as the src for the reply.
               * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
               * (for example) when we encounter an error while forwarding procedure
               * destined to a duplicated address of ours.
               */
   11         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                      ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
    7                 if (ia != NULL && !(ia->ia6_flags &
                          (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
                              src6 = ia->ia_addr.sin6_addr;
                              srcp = &src6;
      
                              if (m->m_pkthdr.rcvif != NULL) {
                                      /* XXX: This may not be the outgoing interface */
   27                                 hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
                              } else
                                      hlim = V_ip6_defhlim;
                      }
                      if (ia != NULL)
                              ifa_free(&ia->ia_ifa);
              }
      
              if (srcp == NULL) {
                      int error;
                      struct in6_addr dst6;
                      uint32_t scopeid;
      
                      /*
                       * This case matches to multicasts, our anycast, or unicasts
                       * that we do not own.  Select a source address based on the
                       * source address of the erroneous packet.
                       */
                      in6_splitscope(&ip6->ip6_src, &dst6, &scopeid);
                      error = in6_selectsrc_addr(M_GETFIB(m), &dst6,
                          scopeid, NULL, &src6, &hlim);
      
                      if (error) {
                              char ip6buf[INET6_ADDRSTRLEN];
   16                         nd6log((LOG_DEBUG,
                                  "icmp6_reflect: source can't be determined: "
                                  "dst=%s, error=%d\n",
                                  ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
                              goto bad;
                      }
                      srcp = &src6;
              }
              /*
               * ip6_input() drops a packet if its src is multicast.
               * So, the src is never multicast.
               */
              ip6->ip6_dst = ip6->ip6_src;
              ip6->ip6_src = *srcp;
              ip6->ip6_flow = 0;
              ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
              ip6->ip6_vfc |= IPV6_VERSION;
              ip6->ip6_nxt = IPPROTO_ICMPV6;
              ip6->ip6_hlim = hlim;
      
              icmp6->icmp6_cksum = 0;
              icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
                  sizeof(struct ip6_hdr), plen);
      
              /*
               * XXX option handling
               */
      
              m->m_flags &= ~(M_BCAST|M_MCAST);
              m->m_pkthdr.rcvif = NULL;
              ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
   11         if (outif)
    4                 icmp6_ifoutstat_inc(outif, type, code);
      
              return;
      
       bad:
              m_freem(m);
              return;
      }
      
      void
      icmp6_fasttimo(void)
      {
      
              mld_fasttimo();
      }
      
      void
      icmp6_slowtimo(void)
      {
      
              mld_slowtimo();
      }
      
      static const char *
      icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
          struct in6_addr *tgt6)
      {
              static char buf[1024];
              char ip6bufs[INET6_ADDRSTRLEN];
              char ip6bufd[INET6_ADDRSTRLEN];
              char ip6buft[INET6_ADDRSTRLEN];
              snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
                  ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
                  ip6_sprintf(ip6buft, tgt6));
              return buf;
      }
      
      void
      icmp6_redirect_input(struct mbuf *m, int off)
    9 {
              struct ifnet *ifp;
              struct ip6_hdr *ip6;
              struct nd_redirect *nd_rd;
              struct in6_addr src6, redtgt6, reddst6;
              union nd_opts ndopts;
              char ip6buf[INET6_ADDRSTRLEN];
              char *lladdr;
              int icmp6len, is_onlink, is_router, lladdrlen;
      
              M_ASSERTPKTHDR(m);
              KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
      
              /* XXX if we are router, we don't update route by icmp6 redirect */
              if (V_ip6_forwarding)
                      goto freeit;
              if (!V_icmp6_rediraccept)
                      goto freeit;
      
              /* RFC 6980: Nodes MUST silently ignore fragments */
              if(m->m_flags & M_FRAGMENTED)
                      goto freeit;
      
              ip6 = mtod(m, struct ip6_hdr *);
              icmp6len = ntohs(ip6->ip6_plen);
    7         if (m->m_len < off + icmp6len) {
                      m = m_pullup(m, off + icmp6len);
                      if (m == NULL) {
                              IP6STAT_INC(ip6s_exthdrtoolong);
                              return;
                      }
              }
              ip6 = mtod(m, struct ip6_hdr *);
              nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
      
              ifp = m->m_pkthdr.rcvif;
              redtgt6 = nd_rd->nd_rd_target;
              reddst6 = nd_rd->nd_rd_dst;
      
    2         if (in6_setscope(&redtgt6, ifp, NULL) ||
                  in6_setscope(&reddst6, ifp, NULL)) {
                      goto freeit;
              }
      
              /* validation */
              src6 = ip6->ip6_src;
    2         if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
    2                 nd6log((LOG_ERR,
                          "ICMP6 redirect sent from %s rejected; "
                          "must be from linklocal\n",
                          ip6_sprintf(ip6buf, &src6)));
                      goto bad;
              }
              if (ip6->ip6_hlim != 255) {
    1                 nd6log((LOG_ERR,
                          "ICMP6 redirect sent from %s rejected; "
                          "hlim=%d (must be 255)\n",
                          ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
                      goto bad;
              }
          {
              /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
              struct nhop_object *nh;
              struct in6_addr kdst;
              uint32_t scopeid;
      
              in6_splitscope(&reddst6, &kdst, &scopeid);
              NET_EPOCH_ASSERT();
              nh = fib6_lookup(ifp->if_fib, &kdst, scopeid, 0, 0);
              if (nh != NULL) {
                      struct in6_addr nh_addr;
                      nh_addr = ifatoia6(nh->nh_ifa)->ia_addr.sin6_addr;
                      if ((nh->nh_flags & NHF_GATEWAY) == 0) {
                              nd6log((LOG_ERR,
                                  "ICMP6 redirect rejected; no route "
                                  "with inet6 gateway found for redirect dst: %s\n",
                                  icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                              goto bad;
                      }
      
                      /*
                       * Embed scope zone id into next hop address.
                       */
                      nh_addr = nh->gw6_sa.sin6_addr;
      
                      if (IN6_ARE_ADDR_EQUAL(&src6, &nh_addr) == 0) {
    1                         nd6log((LOG_ERR,
                                  "ICMP6 redirect rejected; "
                                  "not equal to gw-for-src=%s (must be same): "
                                  "%s\n",
                                  ip6_sprintf(ip6buf, &nh_addr),
                                  icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                              goto bad;
                      }
              } else {
    1                 nd6log((LOG_ERR,
                          "ICMP6 redirect rejected; "
                          "no route found for redirect dst: %s\n",
                          icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                      goto bad;
              }
          }
              if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
                      nd6log((LOG_ERR,
                          "ICMP6 redirect rejected; "
                          "redirect dst must be unicast: %s\n",
                          icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                      goto bad;
              }
      
              is_router = is_onlink = 0;
              if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
                      is_router = 1;        /* router case */
              if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
                      is_onlink = 1;        /* on-link destination case */
              if (!is_router && !is_onlink) {
                      nd6log((LOG_ERR,
                          "ICMP6 redirect rejected; "
                          "neither router case nor onlink case: %s\n",
                          icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                      goto bad;
              }
      
              icmp6len -= sizeof(*nd_rd);
              nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
              if (nd6_options(&ndopts) < 0) {
                      nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
                          __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                      /* nd6_options have incremented stats */
                      goto freeit;
              }
      
              lladdr = NULL;
              lladdrlen = 0;
              if (ndopts.nd_opts_tgt_lladdr) {
                      lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
                      lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
              }
      
              if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
                      nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
                          "(if %d, icmp6 packet %d): %s\n",
                          __func__, ip6_sprintf(ip6buf, &redtgt6),
                          ifp->if_addrlen, lladdrlen - 2,
                          icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
                      goto bad;
              }
      
              /* Validation passed. */
      
              /* RFC 2461 8.3 */
              nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
                  is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
      
              /*
               * Install a gateway route in the better-router case or an interface
               * route in the on-link-destination case.
               */
              {
                      struct sockaddr_in6 sdst;
                      struct sockaddr_in6 sgw;
                      struct sockaddr_in6 ssrc;
                      struct sockaddr *gw;
                      int rt_flags;
                      u_int fibnum;
      
                      bzero(&sdst, sizeof(sdst));
                      bzero(&ssrc, sizeof(ssrc));
                      sdst.sin6_family = ssrc.sin6_family = AF_INET6;
                      sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
                      bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
                      bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
                      rt_flags = 0;
                      if (is_router) {
                              bzero(&sgw, sizeof(sgw));
                              sgw.sin6_family = AF_INET6;
                              sgw.sin6_len = sizeof(struct sockaddr_in6);
                              bcopy(&redtgt6, &sgw.sin6_addr,
                                      sizeof(struct in6_addr));
                              gw = (struct sockaddr *)&sgw;
                              rt_flags |= RTF_GATEWAY;
                      } else
                              gw = ifp->if_addr->ifa_addr;
                      for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
                              rib_add_redirect(fibnum, (struct sockaddr *)&sdst, gw,
                                  (struct sockaddr *)&ssrc, ifp, rt_flags,
                                  V_icmp6_redirtimeout);
              }
              /* finally update cached route in each socket via pfctlinput */
          {
              struct sockaddr_in6 sdst;
      
              bzero(&sdst, sizeof(sdst));
              sdst.sin6_family = AF_INET6;
              sdst.sin6_len = sizeof(struct sockaddr_in6);
              bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
              pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
          }
      
       freeit:
              m_freem(m);
              return;
      
       bad:
              ICMP6STAT_INC(icp6s_badredirect);
              m_freem(m);
      }
      
      void
      icmp6_redirect_output(struct mbuf *m0, struct nhop_object *nh)
      {
              struct ifnet *ifp;        /* my outgoing interface */
              struct in6_addr *ifp_ll6;
              struct in6_addr *router_ll6;
              struct ip6_hdr *sip6;        /* m0 as struct ip6_hdr */
              struct mbuf *m = NULL;        /* newly allocated one */
              struct m_tag *mtag;
              struct ip6_hdr *ip6;        /* m as struct ip6_hdr */
              struct nd_redirect *nd_rd;
              struct llentry *ln = NULL;
              size_t maxlen;
              u_char *p;
              struct ifnet *outif = NULL;
              struct sockaddr_in6 src_sa;
      
              icmp6_errcount(ND_REDIRECT, 0);
      
              /* if we are not router, we don't send icmp6 redirect */
              if (!V_ip6_forwarding)
                      goto fail;
      
              /* sanity check */
              if (!m0 || !nh || !(NH_IS_VALID(nh)) || !(ifp = nh->nh_ifp))
                      goto fail;
      
              /*
               * Address check:
               *  the source address must identify a neighbor, and
               *  the destination address must not be a multicast address
               *  [RFC 2461, sec 8.2]
               */
              sip6 = mtod(m0, struct ip6_hdr *);
              bzero(&src_sa, sizeof(src_sa));
              src_sa.sin6_family = AF_INET6;
              src_sa.sin6_len = sizeof(src_sa);
              src_sa.sin6_addr = sip6->ip6_src;
              if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
                      goto fail;
              if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
                      goto fail;        /* what should we do here? */
      
              /* rate limit */
              if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
                      goto fail;
      
              /*
               * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
               * we almost always ask for an mbuf cluster for simplicity.
               * (MHLEN < IPV6_MMTU is almost always true)
               */
      #if IPV6_MMTU >= MCLBYTES
      # error assumption failed about IPV6_MMTU and MCLBYTES
      #endif
              m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
              if (m == NULL)
                      goto fail;
              M_SETFIB(m, M_GETFIB(m0));
              maxlen = M_TRAILINGSPACE(m);
              maxlen = min(IPV6_MMTU, maxlen);
              /* just for safety */
              if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
                  ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
                      goto fail;
              }
      
              {
                      /* get ip6 linklocal address for ifp(my outgoing interface). */
                      struct in6_ifaddr *ia;
                      if ((ia = in6ifa_ifpforlinklocal(ifp,
                                                       IN6_IFF_NOTREADY|
                                                       IN6_IFF_ANYCAST)) == NULL)
                              goto fail;
                      ifp_ll6 = &ia->ia_addr.sin6_addr;
                      /* XXXRW: reference released prematurely. */
                      ifa_free(&ia->ia_ifa);
              }
      
              /* get ip6 linklocal address for the router. */
              if (nh->nh_flags & NHF_GATEWAY) {
                      struct sockaddr_in6 *sin6;
                      sin6 = &nh->gw6_sa;
                      router_ll6 = &sin6->sin6_addr;
                      if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
                              router_ll6 = (struct in6_addr *)NULL;
              } else
                      router_ll6 = (struct in6_addr *)NULL;
      
              /* ip6 */
              ip6 = mtod(m, struct ip6_hdr *);
              ip6->ip6_flow = 0;
              ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
              ip6->ip6_vfc |= IPV6_VERSION;
              /* ip6->ip6_plen will be set later */
              ip6->ip6_nxt = IPPROTO_ICMPV6;
              ip6->ip6_hlim = 255;
              /* ip6->ip6_src must be linklocal addr for my outgoing if. */
              bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
              bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
      
              /* ND Redirect */
              nd_rd = (struct nd_redirect *)(ip6 + 1);
              nd_rd->nd_rd_type = ND_REDIRECT;
              nd_rd->nd_rd_code = 0;
              nd_rd->nd_rd_reserved = 0;
              if (nh->nh_flags & NHF_GATEWAY) {
                      /*
                       * nd_rd->nd_rd_target must be a link-local address in
                       * better router cases.
                       */
                      if (!router_ll6)
                              goto fail;
                      bcopy(router_ll6, &nd_rd->nd_rd_target,
                          sizeof(nd_rd->nd_rd_target));
                      bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
                          sizeof(nd_rd->nd_rd_dst));
              } else {
                      /* make sure redtgt == reddst */
                      bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
                          sizeof(nd_rd->nd_rd_target));
                      bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
                          sizeof(nd_rd->nd_rd_dst));
              }
      
              p = (u_char *)(nd_rd + 1);
      
              if (!router_ll6)
                      goto nolladdropt;
      
              {
                      /* target lladdr option */
                      int len;
                      struct nd_opt_hdr *nd_opt;
                      char *lladdr;
      
                      ln = nd6_lookup(router_ll6, 0, ifp);
                      if (ln == NULL)
                              goto nolladdropt;
      
                      len = sizeof(*nd_opt) + ifp->if_addrlen;
                      len = (len + 7) & ~7;        /* round by 8 */
                      /* safety check */
                      if (len + (p - (u_char *)ip6) > maxlen)                         
                              goto nolladdropt;
      
                      if (ln->la_flags & LLE_VALID) {
                              nd_opt = (struct nd_opt_hdr *)p;
                              nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
                              nd_opt->nd_opt_len = len >> 3;
                              lladdr = (char *)(nd_opt + 1);
                              bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
                              p += len;
                      }
              }
      nolladdropt:
              if (ln != NULL)
                      LLE_RUNLOCK(ln);
                      
              m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
      
              /* just to be safe */
      #ifdef M_DECRYPTED        /*not openbsd*/
              if (m0->m_flags & M_DECRYPTED)
                      goto noredhdropt;
      #endif
              if (p - (u_char *)ip6 > maxlen)
                      goto noredhdropt;
      
              {
                      /* redirected header option */
                      int len;
                      struct nd_opt_rd_hdr *nd_opt_rh;
      
                      /*
                       * compute the maximum size for icmp6 redirect header option.
                       * XXX room for auth header?
                       */
                      len = maxlen - (p - (u_char *)ip6);
                      len &= ~7;
      
                      /* This is just for simplicity. */
                      if (m0->m_pkthdr.len != m0->m_len) {
                              if (m0->m_next) {
                                      m_freem(m0->m_next);
                                      m0->m_next = NULL;
                              }
                              m0->m_pkthdr.len = m0->m_len;
                      }
      
                      /*
                       * Redirected header option spec (RFC2461 4.6.3) talks nothing
                       * about padding/truncate rule for the original IP packet.
                       * From the discussion on IPv6imp in Feb 1999,
                       * the consensus was:
                       * - "attach as much as possible" is the goal
                       * - pad if not aligned (original size can be guessed by
                       *   original ip6 header)
                       * Following code adds the padding if it is simple enough,
                       * and truncates if not.
                       */
                      if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
                              panic("assumption failed in %s:%d", __FILE__,
                                  __LINE__);
      
                      if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
                              /* not enough room, truncate */
                              m0->m_pkthdr.len = m0->m_len = len -
                                  sizeof(*nd_opt_rh);
                      } else {
                              /* enough room, pad or truncate */
                              size_t extra;
      
                              extra = m0->m_pkthdr.len % 8;
                              if (extra) {
                                      /* pad if easy enough, truncate if not */
                                      if (8 - extra <= M_TRAILINGSPACE(m0)) {
                                              /* pad */
                                              m0->m_len += (8 - extra);
                                              m0->m_pkthdr.len += (8 - extra);
                                      } else {
                                              /* truncate */
                                              m0->m_pkthdr.len -= extra;
                                              m0->m_len -= extra;
                                      }
                              }
                              len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
                              m0->m_pkthdr.len = m0->m_len = len -
                                  sizeof(*nd_opt_rh);
                      }
      
                      nd_opt_rh = (struct nd_opt_rd_hdr *)p;
                      bzero(nd_opt_rh, sizeof(*nd_opt_rh));
                      nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
                      nd_opt_rh->nd_opt_rh_len = len >> 3;
                      p += sizeof(*nd_opt_rh);
                      m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
      
                      /* connect m0 to m */
                      m_tag_delete_chain(m0, NULL);
                      m0->m_flags &= ~M_PKTHDR;
                      m->m_next = m0;
                      m->m_pkthdr.len = m->m_len + m0->m_len;
                      m0 = NULL;
              }
      noredhdropt:;
              if (m0) {
                      m_freem(m0);
                      m0 = NULL;
              }
      
              /* XXX: clear embedded link IDs in the inner header */
              in6_clearscope(&sip6->ip6_src);
              in6_clearscope(&sip6->ip6_dst);
              in6_clearscope(&nd_rd->nd_rd_target);
              in6_clearscope(&nd_rd->nd_rd_dst);
      
              ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
      
              nd_rd->nd_rd_cksum = 0;
              nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
                  sizeof(*ip6), ntohs(ip6->ip6_plen));
      
              if (send_sendso_input_hook != NULL) {
                      mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
                              M_NOWAIT);
                      if (mtag == NULL)
                              goto fail;
                      *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
                      m_tag_prepend(m, mtag);
              }
      
              /* send the packet to outside... */
              ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
              if (outif) {
                      icmp6_ifstat_inc(outif, ifs6_out_msg);
                      icmp6_ifstat_inc(outif, ifs6_out_redirect);
              }
              ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]);
      
              return;
      
      fail:
              if (m)
                      m_freem(m);
              if (m0)
                      m_freem(m0);
      }
      
      /*
       * ICMPv6 socket option processing.
       */
      int
      icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
    4 {
              int error = 0;
              int optlen;
              struct inpcb *inp = sotoinpcb(so);
              int level, op, optname;
      
              if (sopt) {
                      level = sopt->sopt_level;
                      op = sopt->sopt_dir;
                      optname = sopt->sopt_name;
                      optlen = sopt->sopt_valsize;
              } else
                      level = op = optname = optlen = 0;
      
              if (level != IPPROTO_ICMPV6) {
                      return EINVAL;
              }
      
              switch (op) {
              case PRCO_SETOPT:
    1                 switch (optname) {
                      case ICMP6_FILTER:
                          {
                              struct icmp6_filter ic6f;
      
    1                         if (optlen != sizeof(ic6f)) {
                                      error = EMSGSIZE;
                                      break;
                              }
                              error = sooptcopyin(sopt, &ic6f, optlen, optlen);
                              if (error == 0) {
                                      INP_WLOCK(inp);
                                      *inp->in6p_icmp6filt = ic6f;
                                      INP_WUNLOCK(inp);
                              }
                              break;
                          }
      
                      default:
                              error = ENOPROTOOPT;
                              break;
                      }
                      break;
      
              case PRCO_GETOPT:
    1                 switch (optname) {
                      case ICMP6_FILTER:
                          {
    1                         struct icmp6_filter ic6f;
      
                              INP_RLOCK(inp);
                              ic6f = *inp->in6p_icmp6filt;
                              INP_RUNLOCK(inp);
                              error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
                              break;
                          }
      
                      default:
                              error = ENOPROTOOPT;
                              break;
                      }
                      break;
              }
      
              return (error);
      }
      
      /*
       * Perform rate limit check.
       * Returns 0 if it is okay to send the icmp6 packet.
       * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
       * limitation.
       *
       * XXX per-destination/type check necessary?
       *
       * dst - not used at this moment
       * type - not used at this moment
       * code - not used at this moment
       */
      static int
      icmp6_ratelimit(const struct in6_addr *dst, const int type,
          const int code)
      {
              int ret;
      
              ret = 0;        /* okay to send */
      
              /* PPS limit */
              if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
                  V_icmp6errppslim)) {
                      /* The packet is subject to rate limit */
                      ret++;
              }
      
              return ret;
      }
      /*-
       * Implementation of the Common Access Method Transport (XPT) layer.
       *
       * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
       *
       * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs.
       * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions, and the following disclaimer,
       *    without modification, immediately at the beginning of the file.
       * 2. The name of the author may not be used to endorse or promote products
       *    derived from this software without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
       * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      #include "opt_printf.h"
      
      #include <sys/cdefs.h>
      __FBSDID("$FreeBSD$");
      
      #include <sys/param.h>
      #include <sys/bio.h>
      #include <sys/bus.h>
      #include <sys/systm.h>
      #include <sys/types.h>
      #include <sys/malloc.h>
      #include <sys/kernel.h>
      #include <sys/time.h>
      #include <sys/conf.h>
      #include <sys/fcntl.h>
      #include <sys/proc.h>
      #include <sys/sbuf.h>
      #include <sys/smp.h>
      #include <sys/taskqueue.h>
      
      #include <sys/lock.h>
      #include <sys/mutex.h>
      #include <sys/sysctl.h>
      #include <sys/kthread.h>
      
      #include <cam/cam.h>
      #include <cam/cam_ccb.h>
      #include <cam/cam_iosched.h>
      #include <cam/cam_periph.h>
      #include <cam/cam_queue.h>
      #include <cam/cam_sim.h>
      #include <cam/cam_xpt.h>
      #include <cam/cam_xpt_sim.h>
      #include <cam/cam_xpt_periph.h>
      #include <cam/cam_xpt_internal.h>
      #include <cam/cam_debug.h>
      #include <cam/cam_compat.h>
      
      #include <cam/scsi/scsi_all.h>
      #include <cam/scsi/scsi_message.h>
      #include <cam/scsi/scsi_pass.h>
      
      #include <machine/stdarg.h>        /* for xpt_print below */
      
      #include "opt_cam.h"
      
      /* Wild guess based on not wanting to grow the stack too much */
      #define XPT_PRINT_MAXLEN        512
      #ifdef PRINTF_BUFR_SIZE
      #define XPT_PRINT_LEN        PRINTF_BUFR_SIZE
      #else
      #define XPT_PRINT_LEN        128
      #endif
      _Static_assert(XPT_PRINT_LEN <= XPT_PRINT_MAXLEN, "XPT_PRINT_LEN is too large");
      
      /*
       * This is the maximum number of high powered commands (e.g. start unit)
       * that can be outstanding at a particular time.
       */
      #ifndef CAM_MAX_HIGHPOWER
      #define CAM_MAX_HIGHPOWER  4
      #endif
      
      /* Datastructures internal to the xpt layer */
      MALLOC_DEFINE(M_CAMXPT, "CAM XPT", "CAM XPT buffers");
      MALLOC_DEFINE(M_CAMDEV, "CAM DEV", "CAM devices");
      MALLOC_DEFINE(M_CAMCCB, "CAM CCB", "CAM CCBs");
      MALLOC_DEFINE(M_CAMPATH, "CAM path", "CAM paths");
      
      struct xpt_softc {
              uint32_t                xpt_generation;
      
              /* number of high powered commands that can go through right now */
              struct mtx                xpt_highpower_lock;
              STAILQ_HEAD(highpowerlist, cam_ed)        highpowerq;
              int                        num_highpower;
      
              /* queue for handling async rescan requests. */
              TAILQ_HEAD(, ccb_hdr) ccb_scanq;
              int buses_to_config;
              int buses_config_done;
              int announce_nosbuf;
      
              /*
               * Registered buses
               *
               * N.B., "busses" is an archaic spelling of "buses".  In new code
               * "buses" is preferred.
               */
              TAILQ_HEAD(,cam_eb)        xpt_busses;
              u_int                        bus_generation;
      
              int                        boot_delay;
              struct callout                 boot_callout;
              struct task                boot_task;
              struct root_hold_token        xpt_rootmount;
      
              struct mtx                xpt_topo_lock;
              struct taskqueue        *xpt_taskq;
      };
      
      typedef enum {
              DM_RET_COPY                = 0x01,
              DM_RET_FLAG_MASK        = 0x0f,
              DM_RET_NONE                = 0x00,
              DM_RET_STOP                = 0x10,
              DM_RET_DESCEND                = 0x20,
              DM_RET_ERROR                = 0x30,
              DM_RET_ACTION_MASK        = 0xf0
      } dev_match_ret;
      
      typedef enum {
              XPT_DEPTH_BUS,
              XPT_DEPTH_TARGET,
              XPT_DEPTH_DEVICE,
              XPT_DEPTH_PERIPH
      } xpt_traverse_depth;
      
      struct xpt_traverse_config {
              xpt_traverse_depth        depth;
              void                        *tr_func;
              void                        *tr_arg;
      };
      
      typedef        int        xpt_busfunc_t (struct cam_eb *bus, void *arg);
      typedef        int        xpt_targetfunc_t (struct cam_et *target, void *arg);
      typedef        int        xpt_devicefunc_t (struct cam_ed *device, void *arg);
      typedef        int        xpt_periphfunc_t (struct cam_periph *periph, void *arg);
      typedef int        xpt_pdrvfunc_t (struct periph_driver **pdrv, void *arg);
      
      /* Transport layer configuration information */
      static struct xpt_softc xsoftc;
      
      MTX_SYSINIT(xpt_topo_init, &xsoftc.xpt_topo_lock, "XPT topology lock", MTX_DEF);
      
      SYSCTL_INT(_kern_cam, OID_AUTO, boot_delay, CTLFLAG_RDTUN,
                 &xsoftc.boot_delay, 0, "Bus registration wait time");
      SYSCTL_UINT(_kern_cam, OID_AUTO, xpt_generation, CTLFLAG_RD,
                  &xsoftc.xpt_generation, 0, "CAM peripheral generation count");
      SYSCTL_INT(_kern_cam, OID_AUTO, announce_nosbuf, CTLFLAG_RWTUN,
                  &xsoftc.announce_nosbuf, 0, "Don't use sbuf for announcements");
      
      struct cam_doneq {
              struct mtx_padalign        cam_doneq_mtx;
              STAILQ_HEAD(, ccb_hdr)        cam_doneq;
              int                        cam_doneq_sleep;
      };
      
      static struct cam_doneq cam_doneqs[MAXCPU];
      static int cam_num_doneqs;
      static struct proc *cam_proc;
      
      SYSCTL_INT(_kern_cam, OID_AUTO, num_doneqs, CTLFLAG_RDTUN,
                 &cam_num_doneqs, 0, "Number of completion queues/threads");
      
      struct cam_periph *xpt_periph;
      
      static periph_init_t xpt_periph_init;
      
      static struct periph_driver xpt_driver =
      {
              xpt_periph_init, "xpt",
              TAILQ_HEAD_INITIALIZER(xpt_driver.units), /* generation */ 0,
              CAM_PERIPH_DRV_EARLY
      };
      
      PERIPHDRIVER_DECLARE(xpt, xpt_driver);
      
      static d_open_t xptopen;
      static d_close_t xptclose;
      static d_ioctl_t xptioctl;
      static d_ioctl_t xptdoioctl;
      
      static struct cdevsw xpt_cdevsw = {
              .d_version =        D_VERSION,
              .d_flags =        0,
              .d_open =        xptopen,
              .d_close =        xptclose,
              .d_ioctl =        xptioctl,
              .d_name =        "xpt",
      };
      
      /* Storage for debugging datastructures */
      struct cam_path *cam_dpath;
      u_int32_t __read_mostly cam_dflags = CAM_DEBUG_FLAGS;
      SYSCTL_UINT(_kern_cam, OID_AUTO, dflags, CTLFLAG_RWTUN,
              &cam_dflags, 0, "Enabled debug flags");
      u_int32_t cam_debug_delay = CAM_DEBUG_DELAY;
      SYSCTL_UINT(_kern_cam, OID_AUTO, debug_delay, CTLFLAG_RWTUN,
              &cam_debug_delay, 0, "Delay in us after each debug message");
      
      /* Our boot-time initialization hook */
      static int cam_module_event_handler(module_t, int /*modeventtype_t*/, void *);
      
      static moduledata_t cam_moduledata = {
              "cam",
              cam_module_event_handler,
              NULL
      };
      
      static int        xpt_init(void *);
      
      DECLARE_MODULE(cam, cam_moduledata, SI_SUB_CONFIGURE, SI_ORDER_SECOND);
      MODULE_VERSION(cam, 1);
      
      static void                xpt_async_bcast(struct async_list *async_head,
                                              u_int32_t async_code,
                                              struct cam_path *path,
                                              void *async_arg);
      static path_id_t xptnextfreepathid(void);
      static path_id_t xptpathid(const char *sim_name, int sim_unit, int sim_bus);
      static union ccb *xpt_get_ccb(struct cam_periph *periph);
      static union ccb *xpt_get_ccb_nowait(struct cam_periph *periph);
      static void         xpt_run_allocq(struct cam_periph *periph, int sleep);
      static void         xpt_run_allocq_task(void *context, int pending);
      static void         xpt_run_devq(struct cam_devq *devq);
      static callout_func_t xpt_release_devq_timeout;
      static void         xpt_acquire_bus(struct cam_eb *bus);
      static void         xpt_release_bus(struct cam_eb *bus);
      static uint32_t         xpt_freeze_devq_device(struct cam_ed *dev, u_int count);
      static int         xpt_release_devq_device(struct cam_ed *dev, u_int count,
                          int run_queue);
      static struct cam_et*
                       xpt_alloc_target(struct cam_eb *bus, target_id_t target_id);
      static void         xpt_acquire_target(struct cam_et *target);
      static void         xpt_release_target(struct cam_et *target);
      static struct cam_eb*
                       xpt_find_bus(path_id_t path_id);
      static struct cam_et*
                       xpt_find_target(struct cam_eb *bus, target_id_t target_id);
      static struct cam_ed*
                       xpt_find_device(struct cam_et *target, lun_id_t lun_id);
      static void         xpt_config(void *arg);
      static void         xpt_hold_boot_locked(void);
      static int         xpt_schedule_dev(struct camq *queue, cam_pinfo *dev_pinfo,
                                       u_int32_t new_priority);
      static xpt_devicefunc_t xptpassannouncefunc;
      static void         xptaction(struct cam_sim *sim, union ccb *work_ccb);
      static void         xptpoll(struct cam_sim *sim);
      static void         camisr_runqueue(void);
      static void         xpt_done_process(struct ccb_hdr *ccb_h);
      static void         xpt_done_td(void *);
      static dev_match_ret        xptbusmatch(struct dev_match_pattern *patterns,
                                          u_int num_patterns, struct cam_eb *bus);
      static dev_match_ret        xptdevicematch(struct dev_match_pattern *patterns,
                                             u_int num_patterns,
                                             struct cam_ed *device);
      static dev_match_ret        xptperiphmatch(struct dev_match_pattern *patterns,
                                             u_int num_patterns,
                                             struct cam_periph *periph);
      static xpt_busfunc_t        xptedtbusfunc;
      static xpt_targetfunc_t        xptedttargetfunc;
      static xpt_devicefunc_t        xptedtdevicefunc;
      static xpt_periphfunc_t        xptedtperiphfunc;
      static xpt_pdrvfunc_t        xptplistpdrvfunc;
      static xpt_periphfunc_t        xptplistperiphfunc;
      static int                xptedtmatch(struct ccb_dev_match *cdm);
      static int                xptperiphlistmatch(struct ccb_dev_match *cdm);
      static int                xptbustraverse(struct cam_eb *start_bus,
                                             xpt_busfunc_t *tr_func, void *arg);
      static int                xpttargettraverse(struct cam_eb *bus,
                                                struct cam_et *start_target,
                                                xpt_targetfunc_t *tr_func, void *arg);
      static int                xptdevicetraverse(struct cam_et *target,
                                                struct cam_ed *start_device,
                                                xpt_devicefunc_t *tr_func, void *arg);
      static int                xptperiphtraverse(struct cam_ed *device,
                                                struct cam_periph *start_periph,
                                                xpt_periphfunc_t *tr_func, void *arg);
      static int                xptpdrvtraverse(struct periph_driver **start_pdrv,
                                              xpt_pdrvfunc_t *tr_func, void *arg);
      static int                xptpdperiphtraverse(struct periph_driver **pdrv,
                                                  struct cam_periph *start_periph,
                                                  xpt_periphfunc_t *tr_func,
                                                  void *arg);
      static xpt_busfunc_t        xptdefbusfunc;
      static xpt_targetfunc_t        xptdeftargetfunc;
      static xpt_devicefunc_t        xptdefdevicefunc;
      static xpt_periphfunc_t        xptdefperiphfunc;
      static void                xpt_finishconfig_task(void *context, int pending);
      static void                xpt_dev_async_default(u_int32_t async_code,
                                                    struct cam_eb *bus,
                                                    struct cam_et *target,
                                                    struct cam_ed *device,
                                                    void *async_arg);
      static struct cam_ed *        xpt_alloc_device_default(struct cam_eb *bus,
                                                       struct cam_et *target,
                                                       lun_id_t lun_id);
      static xpt_devicefunc_t        xptsetasyncfunc;
      static xpt_busfunc_t        xptsetasyncbusfunc;
      static cam_status        xptregister(struct cam_periph *periph,
                                          void *arg);
      
      static __inline int
      xpt_schedule_devq(struct cam_devq *devq, struct cam_ed *dev)
      {
              int        retval;
      
              mtx_assert(&devq->send_mtx, MA_OWNED);
 3162         if ((dev->ccbq.queue.entries > 0) &&
                  (dev->ccbq.dev_openings > 0) &&
                  (dev->ccbq.queue.qfrozen_cnt == 0)) {
                      /*
                       * The priority of a device waiting for controller
                       * resources is that of the highest priority CCB
                       * enqueued.
                       */
                      retval =
 3162                     xpt_schedule_dev(&devq->send_queue,
                                           &dev->devq_entry,
                                           CAMQ_GET_PRIO(&dev->ccbq.queue));
              } else {
                      retval = 0;
              }
              return (retval);
      }
      
      static __inline int
      device_is_queued(struct cam_ed *device)
      {
              return (device->devq_entry.index != CAM_UNQUEUED_INDEX);
      }
      
      static void
      xpt_periph_init(void)
      {
              make_dev(&xpt_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600, "xpt0");
      }
      
      static int
      xptopen(struct cdev *dev, int flags, int fmt, struct thread *td)
      {
      
              /*
               * Only allow read-write access.
               */
              if (((flags & FWRITE) == 0) || ((flags & FREAD) == 0))
                      return(EPERM);
      
              /*
               * We don't allow nonblocking access.
               */
              if ((flags & O_NONBLOCK) != 0) {
                      printf("%s: can't do nonblocking access\n", devtoname(dev));
                      return(ENODEV);
              }
      
              return(0);
      }
      
      static int
      xptclose(struct cdev *dev, int flag, int fmt, struct thread *td)
      {
      
              return(0);
      }
      
      /*
       * Don't automatically grab the xpt softc lock here even though this is going
       * through the xpt device.  The xpt device is really just a back door for
       * accessing other devices and SIMs, so the right thing to do is to grab
       * the appropriate SIM lock once the bus/SIM is located.
       */
      static int
      xptioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
      {
              int error;
      
              if ((error = xptdoioctl(dev, cmd, addr, flag, td)) == ENOTTY) {
                      error = cam_compat_ioctl(dev, cmd, addr, flag, td, xptdoioctl);
              }
              return (error);
      }
      
      static int
      xptdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
      {
              int error;
      
              error = 0;
      
              switch(cmd) {
              /*
               * For the transport layer CAMIOCOMMAND ioctl, we really only want
               * to accept CCB types that don't quite make sense to send through a
               * passthrough driver. XPT_PATH_INQ is an exception to this, as stated
               * in the CAM spec.
               */
              case CAMIOCOMMAND: {
                      union ccb *ccb;
                      union ccb *inccb;
                      struct cam_eb *bus;
      
                      inccb = (union ccb *)addr;
      #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
                      if (inccb->ccb_h.func_code == XPT_SCSI_IO)
                              inccb->csio.bio = NULL;
      #endif
      
                      if (inccb->ccb_h.flags & CAM_UNLOCKED)
                              return (EINVAL);
      
                      bus = xpt_find_bus(inccb->ccb_h.path_id);
                      if (bus == NULL)
                              return (EINVAL);
      
                      switch (inccb->ccb_h.func_code) {
                      case XPT_SCAN_BUS:
                      case XPT_RESET_BUS:
                              if (inccb->ccb_h.target_id != CAM_TARGET_WILDCARD ||
                                  inccb->ccb_h.target_lun != CAM_LUN_WILDCARD) {
                                      xpt_release_bus(bus);
                                      return (EINVAL);
                              }
                              break;
                      case XPT_SCAN_TGT:
                              if (inccb->ccb_h.target_id == CAM_TARGET_WILDCARD ||
                                  inccb->ccb_h.target_lun != CAM_LUN_WILDCARD) {
                                      xpt_release_bus(bus);
                                      return (EINVAL);
                              }
                              break;
                      default:
                              break;
                      }
      
                      switch(inccb->ccb_h.func_code) {
                      case XPT_SCAN_BUS:
                      case XPT_RESET_BUS:
                      case XPT_PATH_INQ:
                      case XPT_ENG_INQ:
                      case XPT_SCAN_LUN:
                      case XPT_SCAN_TGT:
      
                              ccb = xpt_alloc_ccb();
      
                              /*
                               * Create a path using the bus, target, and lun the
                               * user passed in.
                               */
                              if (xpt_create_path(&ccb->ccb_h.path, NULL,
                                                  inccb->ccb_h.path_id,
                                                  inccb->ccb_h.target_id,
                                                  inccb->ccb_h.target_lun) !=
                                                  CAM_REQ_CMP){
                                      error = EINVAL;
                                      xpt_free_ccb(ccb);
                                      break;
                              }
                              /* Ensure all of our fields are correct */
                              xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path,
                                            inccb->ccb_h.pinfo.priority);
                              xpt_merge_ccb(ccb, inccb);
                              xpt_path_lock(ccb->ccb_h.path);
                              cam_periph_runccb(ccb, NULL, 0, 0, NULL);
                              xpt_path_unlock(ccb->ccb_h.path);
                              bcopy(ccb, inccb, sizeof(union ccb));
                              xpt_free_path(ccb->ccb_h.path);
                              xpt_free_ccb(ccb);
                              break;
      
                      case XPT_DEBUG: {
                              union ccb ccb;
      
                              /*
                               * This is an immediate CCB, so it's okay to
                               * allocate it on the stack.
                               */
      
                              /*
                               * Create a path using the bus, target, and lun the
                               * user passed in.
                               */
                              if (xpt_create_path(&ccb.ccb_h.path, NULL,
                                                  inccb->ccb_h.path_id,
                                                  inccb->ccb_h.target_id,
                                                  inccb->ccb_h.target_lun) !=
                                                  CAM_REQ_CMP){
                                      error = EINVAL;
                                      break;
                              }
                              /* Ensure all of our fields are correct */
                              xpt_setup_ccb(&ccb.ccb_h, ccb.ccb_h.path,
                                            inccb->ccb_h.pinfo.priority);
                              xpt_merge_ccb(&ccb, inccb);
                              xpt_action(&ccb);
                              bcopy(&ccb, inccb, sizeof(union ccb));
                              xpt_free_path(ccb.ccb_h.path);
                              break;
                      }
                      case XPT_DEV_MATCH: {
                              struct cam_periph_map_info mapinfo;
                              struct cam_path *old_path;
      
                              /*
                               * We can't deal with physical addresses for this
                               * type of transaction.
                               */
                              if ((inccb->ccb_h.flags & CAM_DATA_MASK) !=
                                  CAM_DATA_VADDR) {
                                      error = EINVAL;
                                      break;
                              }
      
                              /*
                               * Save this in case the caller had it set to
                               * something in particular.
                               */
                              old_path = inccb->ccb_h.path;
      
                              /*
                               * We really don't need a path for the matching
                               * code.  The path is needed because of the
                               * debugging statements in xpt_action().  They
                               * assume that the CCB has a valid path.
                               */
                              inccb->ccb_h.path = xpt_periph->path;
      
                              bzero(&mapinfo, sizeof(mapinfo));
      
                              /*
                               * Map the pattern and match buffers into kernel
                               * virtual address space.
                               */
                              error = cam_periph_mapmem(inccb, &mapinfo, MAXPHYS);
      
                              if (error) {
                                      inccb->ccb_h.path = old_path;
                                      break;
                              }
      
                              /*
                               * This is an immediate CCB, we can send it on directly.
                               */
                              xpt_action(inccb);
      
                              /*
                               * Map the buffers back into user space.
                               */
                              cam_periph_unmapmem(inccb, &mapinfo);
      
                              inccb->ccb_h.path = old_path;
      
                              error = 0;
                              break;
                      }
                      default:
                              error = ENOTSUP;
                              break;
                      }
                      xpt_release_bus(bus);
                      break;
              }
              /*
               * This is the getpassthru ioctl. It takes a XPT_GDEVLIST ccb as input,
               * with the periphal driver name and unit name filled in.  The other
               * fields don't really matter as input.  The passthrough driver name
               * ("pass"), and unit number are passed back in the ccb.  The current
               * device generation number, and the index into the device peripheral
               * driver list, and the status are also passed back.  Note that
               * since we do everything in one pass, unlike the XPT_GDEVLIST ccb,
               * we never return a status of CAM_GDEVLIST_LIST_CHANGED.  It is
               * (or rather should be) impossible for the device peripheral driver
               * list to change since we look at the whole thing in one pass, and
               * we do it with lock protection.
               *
               */
              case CAMGETPASSTHRU: {
                      union ccb *ccb;
                      struct cam_periph *periph;
                      struct periph_driver **p_drv;
                      char   *name;
                      u_int unit;
                      int base_periph_found;
      
                      ccb = (union ccb *)addr;
                      unit = ccb->cgdl.unit_number;
                      name = ccb->cgdl.periph_name;
                      base_periph_found = 0;
      #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
                      if (ccb->ccb_h.func_code == XPT_SCSI_IO)
                              ccb->csio.bio = NULL;
      #endif
      
                      /*
                       * Sanity check -- make sure we don't get a null peripheral
                       * driver name.
                       */
                      if (*ccb->cgdl.periph_name == '\0') {
                              error = EINVAL;
                              break;
                      }
      
                      /* Keep the list from changing while we traverse it */
                      xpt_lock_buses();
      
                      /* first find our driver in the list of drivers */
                      for (p_drv = periph_drivers; *p_drv != NULL; p_drv++)
                              if (strcmp((*p_drv)->driver_name, name) == 0)
                                      break;
      
                      if (*p_drv == NULL) {
                              xpt_unlock_buses();
                              ccb->ccb_h.status = CAM_REQ_CMP_ERR;
                              ccb->cgdl.status = CAM_GDEVLIST_ERROR;
                              *ccb->cgdl.periph_name = '\0';
                              ccb->cgdl.unit_number = 0;
                              error = ENOENT;
                              break;
                      }
      
                      /*
                       * Run through every peripheral instance of this driver
                       * and check to see whether it matches the unit passed
                       * in by the user.  If it does, get out of the loops and
                       * find the passthrough driver associated with that
                       * peripheral driver.
                       */
                      for (periph = TAILQ_FIRST(&(*p_drv)->units); periph != NULL;
                           periph = TAILQ_NEXT(periph, unit_links)) {
                              if (periph->unit_number == unit)
                                      break;
                      }
                      /*
                       * If we found the peripheral driver that the user passed
                       * in, go through all of the peripheral drivers for that
                       * particular device and look for a passthrough driver.
                       */
                      if (periph != NULL) {
                              struct cam_ed *device;
                              int i;
      
                              base_periph_found = 1;
                              device = periph->path->device;
                              for (i = 0, periph = SLIST_FIRST(&device->periphs);
                                   periph != NULL;
                                   periph = SLIST_NEXT(periph, periph_links), i++) {
                                      /*
                                       * Check to see whether we have a
                                       * passthrough device or not.
                                       */
                                      if (strcmp(periph->periph_name, "pass") == 0) {
                                              /*
                                               * Fill in the getdevlist fields.
                                               */
                                              strlcpy(ccb->cgdl.periph_name,
                                                     periph->periph_name,
                                                     sizeof(ccb->cgdl.periph_name));
                                              ccb->cgdl.unit_number =
                                                      periph->unit_number;
                                              if (SLIST_NEXT(periph, periph_links))
                                                      ccb->cgdl.status =
                                                              CAM_GDEVLIST_MORE_DEVS;
                                              else
                                                      ccb->cgdl.status =
                                                             CAM_GDEVLIST_LAST_DEVICE;
                                              ccb->cgdl.generation =
                                                      device->generation;
                                              ccb->cgdl.index = i;
                                              /*
                                               * Fill in some CCB header fields
                                               * that the user may want.
                                               */
                                              ccb->ccb_h.path_id =
                                                      periph->path->bus->path_id;
                                              ccb->ccb_h.target_id =
                                                      periph->path->target->target_id;
                                              ccb->ccb_h.target_lun =
                                                      periph->path->device->lun_id;
                                              ccb->ccb_h.status = CAM_REQ_CMP;
                                              break;
                                      }
                              }
                      }
      
                      /*
                       * If the periph is null here, one of two things has
                       * happened.  The first possibility is that we couldn't
                       * find the unit number of the particular peripheral driver
                       * that the user is asking about.  e.g. the user asks for
                       * the passthrough driver for "da11".  We find the list of
                       * "da" peripherals all right, but there is no unit 11.
                       * The other possibility is that we went through the list
                       * of peripheral drivers attached to the device structure,
                       * but didn't find one with the name "pass".  Either way,
                       * we return ENOENT, since we couldn't find something.
                       */
                      if (periph == NULL) {
                              ccb->ccb_h.status = CAM_REQ_CMP_ERR;
                              ccb->cgdl.status = CAM_GDEVLIST_ERROR;
                              *ccb->cgdl.periph_name = '\0';
                              ccb->cgdl.unit_number = 0;
                              error = ENOENT;
                              /*
                               * It is unfortunate that this is even necessary,
                               * but there are many, many clueless users out there.
                               * If this is true, the user is looking for the
                               * passthrough driver, but doesn't have one in his
                               * kernel.
                               */
                              if (base_periph_found == 1) {
                                      printf("xptioctl: pass driver is not in the "
                                             "kernel\n");
                                      printf("xptioctl: put \"device pass\" in "
                                             "your kernel config file\n");
                              }
                      }
                      xpt_unlock_buses();
                      break;
                      }
              default:
                      error = ENOTTY;
                      break;
              }
      
              return(error);
      }
      
      static int
      cam_module_event_handler(module_t mod, int what, void *arg)
      {
              int error;
      
              switch (what) {
              case MOD_LOAD:
                      if ((error = xpt_init(NULL)) != 0)
                              return (error);
                      break;
              case MOD_UNLOAD:
                      return EBUSY;
              default:
                      return EOPNOTSUPP;
              }
      
              return 0;
      }
      
      static struct xpt_proto *
      xpt_proto_find(cam_proto proto)
      {
              struct xpt_proto **pp;
      
 3162         SET_FOREACH(pp, cam_xpt_proto_set) {
                      if ((*pp)->proto == proto)
                              return *pp;
              }
      
              return NULL;
      }
      
      static void
      xpt_rescan_done(struct cam_periph *periph, union ccb *done_ccb)
      {
      
              if (done_ccb->ccb_h.ppriv_ptr1 == NULL) {
                      xpt_free_path(done_ccb->ccb_h.path);
                      xpt_free_ccb(done_ccb);
              } else {
                      done_ccb->ccb_h.cbfcnp = done_ccb->ccb_h.ppriv_ptr1;
                      (*done_ccb->ccb_h.cbfcnp)(periph, done_ccb);
              }
              xpt_release_boot();
      }
      
      /* thread to handle bus rescans */
      static void
      xpt_scanner_thread(void *dummy)
      {
              union ccb        *ccb;
              struct mtx        *mtx;
              struct cam_ed        *device;
      
              xpt_lock_buses();
              for (;;) {
                      if (TAILQ_EMPTY(&xsoftc.ccb_scanq))
                              msleep(&xsoftc.ccb_scanq, &xsoftc.xpt_topo_lock, PRIBIO,
                                     "-", 0);
                      if ((ccb = (union ccb *)TAILQ_FIRST(&xsoftc.ccb_scanq)) != NULL) {
                              TAILQ_REMOVE(&xsoftc.ccb_scanq, &ccb->ccb_h, sim_links.tqe);
                              xpt_unlock_buses();
      
                              /*
                               * We need to lock the device's mutex which we use as
                               * the path mutex. We can't do it directly because the
                               * cam_path in the ccb may wind up going away because
                               * the path lock may be dropped and the path retired in
                               * the completion callback. We do this directly to keep
                               * the reference counts in cam_path sane. We also have
                               * to copy the device pointer because ccb_h.path may
                               * be freed in the callback.
                               */
                              mtx = xpt_path_mtx(ccb->ccb_h.path);
                              device = ccb->ccb_h.path->device;
                              xpt_acquire_device(device);
                              mtx_lock(mtx);
                              xpt_action(ccb);
                              mtx_unlock(mtx);
                              xpt_release_device(device);
      
                              xpt_lock_buses();
                      }
              }
      }
      
      void
      xpt_rescan(union ccb *ccb)
      {
              struct ccb_hdr *hdr;
      
              /* Prepare request */
              if (ccb->ccb_h.path->target->target_id == CAM_TARGET_WILDCARD &&
                  ccb->ccb_h.path->device->lun_id == CAM_LUN_WILDCARD)
                      ccb->ccb_h.func_code = XPT_SCAN_BUS;
              else if (ccb->ccb_h.path->target->target_id != CAM_TARGET_WILDCARD &&
                  ccb->ccb_h.path->device->lun_id == CAM_LUN_WILDCARD)
                      ccb->ccb_h.func_code = XPT_SCAN_TGT;
              else if (ccb->ccb_h.path->target->target_id != CAM_TARGET_WILDCARD &&
                  ccb->ccb_h.path->device->lun_id != CAM_LUN_WILDCARD)
                      ccb->ccb_h.func_code = XPT_SCAN_LUN;
              else {
                      xpt_print(ccb->ccb_h.path, "illegal scan path\n");
                      xpt_free_path(ccb->ccb_h.path);
                      xpt_free_ccb(ccb);
                      return;
              }
              CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
                  ("xpt_rescan: func %#x %s\n", ccb->ccb_h.func_code,
                       xpt_action_name(ccb->ccb_h.func_code)));
      
              ccb->ccb_h.ppriv_ptr1 = ccb->ccb_h.cbfcnp;
              ccb->ccb_h.cbfcnp = xpt_rescan_done;
              xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path, CAM_PRIORITY_XPT);
              /* Don't make duplicate entries for the same paths. */
              xpt_lock_buses();
              if (ccb->ccb_h.ppriv_ptr1 == NULL) {
                      TAILQ_FOREACH(hdr, &xsoftc.ccb_scanq, sim_links.tqe) {
                              if (xpt_path_comp(hdr->path, ccb->ccb_h.path) == 0) {
                                      wakeup(&xsoftc.ccb_scanq);
                                      xpt_unlock_buses();
                                      xpt_print(ccb->ccb_h.path, "rescan already queued\n");
                                      xpt_free_path(ccb->ccb_h.path);
                                      xpt_free_ccb(ccb);
                                      return;
                              }
                      }
              }
              TAILQ_INSERT_TAIL(&xsoftc.ccb_scanq, &ccb->ccb_h, sim_links.tqe);
              xpt_hold_boot_locked();
              wakeup(&xsoftc.ccb_scanq);
              xpt_unlock_buses();
      }
      
      /* Functions accessed by the peripheral drivers */
      static int
      xpt_init(void *dummy)
      {
              struct cam_sim *xpt_sim;
              struct cam_path *path;
              struct cam_devq *devq;
              cam_status status;
              int error, i;
      
              TAILQ_INIT(&xsoftc.xpt_busses);
              TAILQ_INIT(&xsoftc.ccb_scanq);
              STAILQ_INIT(&xsoftc.highpowerq);
              xsoftc.num_highpower = CAM_MAX_HIGHPOWER;
      
              mtx_init(&xsoftc.xpt_highpower_lock, "XPT highpower lock", NULL, MTX_DEF);
              xsoftc.xpt_taskq = taskqueue_create("CAM XPT task", M_WAITOK,
                  taskqueue_thread_enqueue, /*context*/&xsoftc.xpt_taskq);
      
      #ifdef CAM_BOOT_DELAY
              /*
               * Override this value at compile time to assist our users
               * who don't use loader to boot a kernel.
               */
              xsoftc.boot_delay = CAM_BOOT_DELAY;
      #endif
      
              /*
               * The xpt layer is, itself, the equivalent of a SIM.
               * Allow 16 ccbs in the ccb pool for it.  This should
               * give decent parallelism when we probe buses and
               * perform other XPT functions.
               */
              devq = cam_simq_alloc(16);
              xpt_sim = cam_sim_alloc(xptaction,
                                      xptpoll,
                                      "xpt",
                                      /*softc*/NULL,
                                      /*unit*/0,
                                      /*mtx*/NULL,
                                      /*max_dev_transactions*/0,
                                      /*max_tagged_dev_transactions*/0,
                                      devq);
              if (xpt_sim == NULL)
                      return (ENOMEM);
      
              if ((status = xpt_bus_register(xpt_sim, NULL, 0)) != CAM_SUCCESS) {
                      printf("xpt_init: xpt_bus_register failed with status %#x,"
                             " failing attach\n", status);
                      return (EINVAL);
              }
      
              /*
               * Looking at the XPT from the SIM layer, the XPT is
               * the equivalent of a peripheral driver.  Allocate
               * a peripheral driver entry for us.
               */
              if ((status = xpt_create_path(&path, NULL, CAM_XPT_PATH_ID,
                                            CAM_TARGET_WILDCARD,
                                            CAM_LUN_WILDCARD)) != CAM_REQ_CMP) {
                      printf("xpt_init: xpt_create_path failed with status %#x,"
                             " failing attach\n", status);
                      return (EINVAL);
              }
              xpt_path_lock(path);
              cam_periph_alloc(xptregister, NULL, NULL, NULL, "xpt", CAM_PERIPH_BIO,
                               path, NULL, 0, xpt_sim);
              xpt_path_unlock(path);
              xpt_free_path(path);
      
              if (cam_num_doneqs < 1)
                      cam_num_doneqs = 1 + mp_ncpus / 6;
              else if (cam_num_doneqs > MAXCPU)
                      cam_num_doneqs = MAXCPU;
              for (i = 0; i < cam_num_doneqs; i++) {
                      mtx_init(&cam_doneqs[i].cam_doneq_mtx, "CAM doneq", NULL,
                          MTX_DEF);
                      STAILQ_INIT(&cam_doneqs[i].cam_doneq);
                      error = kproc_kthread_add(xpt_done_td, &cam_doneqs[i],
                          &cam_proc, NULL, 0, 0, "cam", "doneq%d", i);
                      if (error != 0) {
                              cam_num_doneqs = i;
                              break;
                      }
              }
              if (cam_num_doneqs < 1) {
                      printf("xpt_init: Cannot init completion queues "
                             "- failing attach\n");
                      return (ENOMEM);
              }
      
              /*
               * Register a callback for when interrupts are enabled.
               */
              config_intrhook_oneshot(xpt_config, NULL);
      
              return (0);
      }
      
      static cam_status
      xptregister(struct cam_periph *periph, void *arg)
      {
              struct cam_sim *xpt_sim;
      
              if (periph == NULL) {
                      printf("xptregister: periph was NULL!!\n");
                      return(CAM_REQ_CMP_ERR);
              }
      
              xpt_sim = (struct cam_sim *)arg;
              xpt_sim->softc = periph;
              xpt_periph = periph;
              periph->softc = NULL;
      
              return(CAM_REQ_CMP);
      }
      
      int32_t
      xpt_add_periph(struct cam_periph *periph)
      {
              struct cam_ed *device;
              int32_t         status;
      
              TASK_INIT(&periph->periph_run_task, 0, xpt_run_allocq_task, periph);
              device = periph->path->device;
              status = CAM_REQ_CMP;
              if (device != NULL) {
                      mtx_lock(&device->target->bus->eb_mtx);
                      device->generation++;
                      SLIST_INSERT_HEAD(&device->periphs, periph, periph_links);
                      mtx_unlock(&device->target->bus->eb_mtx);
                      atomic_add_32(&xsoftc.xpt_generation, 1);
              }
      
              return (status);
      }
      
      void
      xpt_remove_periph(struct cam_periph *periph)
      {
              struct cam_ed *device;
      
              device = periph->path->device;
              if (device != NULL) {
                      mtx_lock(&device->target->bus->eb_mtx);
                      device->generation++;
                      SLIST_REMOVE(&device->periphs, periph, cam_periph, periph_links);
                      mtx_unlock(&device->target->bus->eb_mtx);
                      atomic_add_32(&xsoftc.xpt_generation, 1);
              }
      }
      
      void
      xpt_announce_periph(struct cam_periph *periph, char *announce_string)
      {
              struct        cam_path *path = periph->path;
              struct  xpt_proto *proto;
      
              cam_periph_assert(periph, MA_OWNED);
              periph->flags |= CAM_PERIPH_ANNOUNCED;
      
              printf("%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
                     periph->periph_name, periph->unit_number,
                     path->bus->sim->sim_name,
                     path->bus->sim->unit_number,
                     path->bus->sim->bus_id,
                     path->bus->path_id,
                     path->target->target_id,
                     (uintmax_t)path->device->lun_id);
              printf("%s%d: ", periph->periph_name, periph->unit_number);
              proto = xpt_proto_find(path->device->protocol);
              if (proto)
                      proto->ops->announce(path->device);
              else
                      printf("%s%d: Unknown protocol device %d\n",
                          periph->periph_name, periph->unit_number,
                          path->device->protocol);
              if (path->device->serial_num_len > 0) {
                      /* Don't wrap the screen  - print only the first 60 chars */
                      printf("%s%d: Serial Number %.60s\n", periph->periph_name,
                             periph->unit_number, path->device->serial_num);
              }
              /* Announce transport details. */
              path->bus->xport->ops->announce(periph);
              /* Announce command queueing. */
              if (path->device->inq_flags & SID_CmdQue
               || path->device->flags & CAM_DEV_TAG_AFTER_COUNT) {
                      printf("%s%d: Command Queueing enabled\n",
                             periph->periph_name, periph->unit_number);
              }
              /* Announce caller's details if they've passed in. */
              if (announce_string != NULL)
                      printf("%s%d: %s\n", periph->periph_name,
                             periph->unit_number, announce_string);
      }
      
      void
      xpt_announce_periph_sbuf(struct cam_periph *periph, struct sbuf *sb,
          char *announce_string)
      {
              struct        cam_path *path = periph->path;
              struct  xpt_proto *proto;
      
              cam_periph_assert(periph, MA_OWNED);
              periph->flags |= CAM_PERIPH_ANNOUNCED;
      
              /* Fall back to the non-sbuf method if necessary */
              if (xsoftc.announce_nosbuf != 0) {
                      xpt_announce_periph(periph, announce_string);
                      return;
              }
              proto = xpt_proto_find(path->device->protocol);
              if (((proto != NULL) && (proto->ops->announce_sbuf == NULL)) ||
                  (path->bus->xport->ops->announce_sbuf == NULL)) {
                      xpt_announce_periph(periph, announce_string);
                      return;
              }
      
              sbuf_printf(sb, "%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
                  periph->periph_name, periph->unit_number,
                  path->bus->sim->sim_name,
                  path->bus->sim->unit_number,
                  path->bus->sim->bus_id,
                  path->bus->path_id,
                  path->target->target_id,
                  (uintmax_t)path->device->lun_id);
              sbuf_printf(sb, "%s%d: ", periph->periph_name, periph->unit_number);
      
              if (proto)
                      proto->ops->announce_sbuf(path->device, sb);
              else
                      sbuf_printf(sb, "%s%d: Unknown protocol device %d\n",
                          periph->periph_name, periph->unit_number,
                          path->device->protocol);
              if (path->device->serial_num_len > 0) {
                      /* Don't wrap the screen  - print only the first 60 chars */
                      sbuf_printf(sb, "%s%d: Serial Number %.60s\n",
                          periph->periph_name, periph->unit_number,
                          path->device->serial_num);
              }
              /* Announce transport details. */
              path->bus->xport->ops->announce_sbuf(periph, sb);
              /* Announce command queueing. */
              if (path->device->inq_flags & SID_CmdQue
               || path->device->flags & CAM_DEV_TAG_AFTER_COUNT) {
                      sbuf_printf(sb, "%s%d: Command Queueing enabled\n",
                          periph->periph_name, periph->unit_number);
              }
              /* Announce caller's details if they've passed in. */
              if (announce_string != NULL)
                      sbuf_printf(sb, "%s%d: %s\n", periph->periph_name,
                          periph->unit_number, announce_string);
      }
      
      void
      xpt_announce_quirks(struct cam_periph *periph, int quirks, char *bit_string)
      {
              if (quirks != 0) {
                      printf("%s%d: quirks=0x%b\n", periph->periph_name,
                          periph->unit_number, quirks, bit_string);
              }
      }
      
      void
      xpt_announce_quirks_sbuf(struct cam_periph *periph, struct sbuf *sb,
                               int quirks, char *bit_string)
      {
              if (xsoftc.announce_nosbuf != 0) {
                      xpt_announce_quirks(periph, quirks, bit_string);
                      return;
              }
      
              if (quirks != 0) {
                      sbuf_printf(sb, "%s%d: quirks=0x%b\n", periph->periph_name,
                          periph->unit_number, quirks, bit_string);
              }
      }
      
      void
      xpt_denounce_periph(struct cam_periph *periph)
      {
              struct        cam_path *path = periph->path;
              struct  xpt_proto *proto;
      
              cam_periph_assert(periph, MA_OWNED);
              printf("%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
                     periph->periph_name, periph->unit_number,
                     path->bus->sim->sim_name,
                     path->bus->sim->unit_number,
                     path->bus->sim->bus_id,
                     path->bus->path_id,
                     path->target->target_id,
                     (uintmax_t)path->device->lun_id);
              printf("%s%d: ", periph->periph_name, periph->unit_number);
              proto = xpt_proto_find(path->device->protocol);
              if (proto)
                      proto->ops->denounce(path->device);
              else
                      printf("%s%d: Unknown protocol device %d\n",
                          periph->periph_name, periph->unit_number,
                          path->device->protocol);
              if (path->device->serial_num_len > 0)
                      printf(" s/n %.60s", path->device->serial_num);
              printf(" detached\n");
      }
      
      void
      xpt_denounce_periph_sbuf(struct cam_periph *periph, struct sbuf *sb)
      {
              struct cam_path *path = periph->path;
              struct xpt_proto *proto;
      
              cam_periph_assert(periph, MA_OWNED);
      
              /* Fall back to the non-sbuf method if necessary */
              if (xsoftc.announce_nosbuf != 0) {
                      xpt_denounce_periph(periph);
                      return;
              }
              proto = xpt_proto_find(path->device->protocol);
              if ((proto != NULL) && (proto->ops->denounce_sbuf == NULL)) {
                      xpt_denounce_periph(periph);
                      return;
              }
      
              sbuf_printf(sb, "%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
                  periph->periph_name, periph->unit_number,
                  path->bus->sim->sim_name,
                  path->bus->sim->unit_number,
                  path->bus->sim->bus_id,
                  path->bus->path_id,
                  path->target->target_id,
                  (uintmax_t)path->device->lun_id);
              sbuf_printf(sb, "%s%d: ", periph->periph_name, periph->unit_number);
      
              if (proto)
                      proto->ops->denounce_sbuf(path->device, sb);
              else
                      sbuf_printf(sb, "%s%d: Unknown protocol device %d\n",
                          periph->periph_name, periph->unit_number,
                          path->device->protocol);
              if (path->device->serial_num_len > 0)
                      sbuf_printf(sb, " s/n %.60s", path->device->serial_num);
              sbuf_printf(sb, " detached\n");
      }
      
      int
      xpt_getattr(char *buf, size_t len, const char *attr, struct cam_path *path)
      {
              int ret = -1, l, o;
              struct ccb_dev_advinfo cdai;
              struct scsi_vpd_device_id *did;
              struct scsi_vpd_id_descriptor *idd;
      
              xpt_path_assert(path, MA_OWNED);
      
              memset(&cdai, 0, sizeof(cdai));
              xpt_setup_ccb(&cdai.ccb_h, path, CAM_PRIORITY_NORMAL);
              cdai.ccb_h.func_code = XPT_DEV_ADVINFO;
              cdai.flags = CDAI_FLAG_NONE;
              cdai.bufsiz = len;
              cdai.buf = buf;
      
              if (!strcmp(attr, "GEOM::ident"))
                      cdai.buftype = CDAI_TYPE_SERIAL_NUM;
              else if (!strcmp(attr, "GEOM::physpath"))
                      cdai.buftype = CDAI_TYPE_PHYS_PATH;
              else if (strcmp(attr, "GEOM::lunid") == 0 ||
                       strcmp(attr, "GEOM::lunname") == 0) {
                      cdai.buftype = CDAI_TYPE_SCSI_DEVID;
                      cdai.bufsiz = CAM_SCSI_DEVID_MAXLEN;
                      cdai.buf = malloc(cdai.bufsiz, M_CAMXPT, M_NOWAIT);
                      if (cdai.buf == NULL) {
                              ret = ENOMEM;
                              goto out;
                      }
              } else
                      goto out;
      
              xpt_action((union ccb *)&cdai); /* can only be synchronous */
              if ((cdai.ccb_h.status & CAM_DEV_QFRZN) != 0)
                      cam_release_devq(cdai.ccb_h.path, 0, 0, 0, FALSE);
              if (cdai.provsiz == 0)
                      goto out;
              switch(cdai.buftype) {
              case CDAI_TYPE_SCSI_DEVID:
                      did = (struct scsi_vpd_device_id *)cdai.buf;
                      if (strcmp(attr, "GEOM::lunid") == 0) {
                              idd = scsi_get_devid(did, cdai.provsiz,
                                  scsi_devid_is_lun_naa);
                              if (idd == NULL)
                                      idd = scsi_get_devid(did, cdai.provsiz,
                                          scsi_devid_is_lun_eui64);
                              if (idd == NULL)
                                      idd = scsi_get_devid(did, cdai.provsiz,
                                          scsi_devid_is_lun_uuid);
                              if (idd == NULL)
                                      idd = scsi_get_devid(did, cdai.provsiz,
                                          scsi_devid_is_lun_md5);
                      } else
                              idd = NULL;
      
                      if (idd == NULL)
                              idd = scsi_get_devid(did, cdai.provsiz,
                                  scsi_devid_is_lun_t10);
                      if (idd == NULL)
                              idd = scsi_get_devid(did, cdai.provsiz,
                                  scsi_devid_is_lun_name);
                      if (idd == NULL)
                              break;
      
                      ret = 0;
                      if ((idd->proto_codeset & SVPD_ID_CODESET_MASK) ==
                          SVPD_ID_CODESET_ASCII) {
                              if (idd->length < len) {
                                      for (l = 0; l < idd->length; l++)
                                              buf[l] = idd->identifier[l] ?
                                                  idd->identifier[l] : ' ';
                                      buf[l] = 0;
                              } else
                                      ret = EFAULT;
                              break;
                      }
                      if ((idd->proto_codeset & SVPD_ID_CODESET_MASK) ==
                          SVPD_ID_CODESET_UTF8) {
                              l = strnlen(idd->identifier, idd->length);
                              if (l < len) {
                                      bcopy(idd->identifier, buf, l);
                                      buf[l] = 0;
                              } else
                                      ret = EFAULT;
                              break;
                      }
                      if ((idd->id_type & SVPD_ID_TYPE_MASK) ==
                          SVPD_ID_TYPE_UUID && idd->identifier[0] == 0x10) {
                              if ((idd->length - 2) * 2 + 4 >= len) {
                                      ret = EFAULT;
                                      break;
                              }
                              for (l = 2, o = 0; l < idd->length; l++) {
                                      if (l == 6 || l == 8 || l == 10 || l == 12)
                                          o += sprintf(buf + o, "-");
                                      o += sprintf(buf + o, "%02x",
                                          idd->identifier[l]);
                              }
                              break;
                      }
                      if (idd->length * 2 < len) {
                              for (l = 0; l < idd->length; l++)
                                      sprintf(buf + l * 2, "%02x",
                                          idd->identifier[l]);
                      } else
                                      ret = EFAULT;
                      break;
              default:
                      if (cdai.provsiz < len) {
                              cdai.buf[cdai.provsiz] = 0;
                              ret = 0;
                      } else
                              ret = EFAULT;
                      break;
              }
      
      out:
              if ((char *)cdai.buf != buf)
                      free(cdai.buf, M_CAMXPT);
              return ret;
      }
      
      static dev_match_ret
      xptbusmatch(struct dev_match_pattern *patterns, u_int num_patterns,
                  struct cam_eb *bus)
      {
              dev_match_ret retval;
              u_int i;
      
              retval = DM_RET_NONE;
      
              /*
               * If we aren't given something to match against, that's an error.
               */
              if (bus == NULL)
                      return(DM_RET_ERROR);
      
              /*
               * If there are no match entries, then this bus matches no
               * matter what.
               */
              if ((patterns == NULL) || (num_patterns == 0))
                      return(DM_RET_DESCEND | DM_RET_COPY);
      
              for (i = 0; i < num_patterns; i++) {
                      struct bus_match_pattern *cur_pattern;
      
                      /*
                       * If the pattern in question isn't for a bus node, we
                       * aren't interested.  However, we do indicate to the
                       * calling routine that we should continue descending the
                       * tree, since the user wants to match against lower-level
                       * EDT elements.
                       */
                      if (patterns[i].type != DEV_MATCH_BUS) {
                              if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
                                      retval |= DM_RET_DESCEND;
                              continue;
                      }
      
                      cur_pattern = &patterns[i].pattern.bus_pattern;
      
                      /*
                       * If they want to match any bus node, we give them any
                       * device node.
                       */
                      if (cur_pattern->flags == BUS_MATCH_ANY) {
                              /* set the copy flag */
                              retval |= DM_RET_COPY;
      
                              /*
                               * If we've already decided on an action, go ahead
                               * and return.
                               */
                              if ((retval & DM_RET_ACTION_MASK) != DM_RET_NONE)
                                      return(retval);
                      }
      
                      /*
                       * Not sure why someone would do this...
                       */
                      if (cur_pattern->flags == BUS_MATCH_NONE)
                              continue;
      
                      if (((cur_pattern->flags & BUS_MATCH_PATH) != 0)
                       && (cur_pattern->path_id != bus->path_id))
                              continue;
      
                      if (((cur_pattern->flags & BUS_MATCH_BUS_ID) != 0)
                       && (cur_pattern->bus_id != bus->sim->bus_id))
                              continue;
      
                      if (((cur_pattern->flags & BUS_MATCH_UNIT) != 0)
                       && (cur_pattern->unit_number != bus->sim->unit_number))
                              continue;
      
                      if (((cur_pattern->flags & BUS_MATCH_NAME) != 0)
                       && (strncmp(cur_pattern->dev_name, bus->sim->sim_name,
                                   DEV_IDLEN) != 0))
                              continue;
      
                      /*
                       * If we get to this point, the user definitely wants
                       * information on this bus.  So tell the caller to copy the
                       * data out.
                       */
                      retval |= DM_RET_COPY;
      
                      /*
                       * If the return action has been set to descend, then we
                       * know that we've already seen a non-bus matching
                       * expression, therefore we need to further descend the tree.
                       * This won't change by continuing around the loop, so we
                       * go ahead and return.  If we haven't seen a non-bus
                       * matching expression, we keep going around the loop until
                       * we exhaust the matching expressions.  We'll set the stop
                       * flag once we fall out of the loop.
                       */
                      if ((retval & DM_RET_ACTION_MASK) == DM_RET_DESCEND)
                              return(retval);
              }
      
              /*
               * If the return action hasn't been set to descend yet, that means
               * we haven't seen anything other than bus matching patterns.  So
               * tell the caller to stop descending the tree -- the user doesn't
               * want to match against lower level tree elements.
               */
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
                      retval |= DM_RET_STOP;
      
              return(retval);
      }
      
      static dev_match_ret
      xptdevicematch(struct dev_match_pattern *patterns, u_int num_patterns,
                     struct cam_ed *device)
      {
              dev_match_ret retval;
              u_int i;
      
              retval = DM_RET_NONE;
      
              /*
               * If we aren't given something to match against, that's an error.
               */
              if (device == NULL)
                      return(DM_RET_ERROR);
      
              /*
               * If there are no match entries, then this device matches no
               * matter what.
               */
              if ((patterns == NULL) || (num_patterns == 0))
                      return(DM_RET_DESCEND | DM_RET_COPY);
      
              for (i = 0; i < num_patterns; i++) {
                      struct device_match_pattern *cur_pattern;
                      struct scsi_vpd_device_id *device_id_page;
      
                      /*
                       * If the pattern in question isn't for a device node, we
                       * aren't interested.
                       */
                      if (patterns[i].type != DEV_MATCH_DEVICE) {
                              if ((patterns[i].type == DEV_MATCH_PERIPH)
                               && ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE))
                                      retval |= DM_RET_DESCEND;
                              continue;
                      }
      
                      cur_pattern = &patterns[i].pattern.device_pattern;
      
                      /* Error out if mutually exclusive options are specified. */
                      if ((cur_pattern->flags & (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID))
                       == (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID))
                              return(DM_RET_ERROR);
      
                      /*
                       * If they want to match any device node, we give them any
                       * device node.
                       */
                      if (cur_pattern->flags == DEV_MATCH_ANY)
                              goto copy_dev_node;
      
                      /*
                       * Not sure why someone would do this...
                       */
                      if (cur_pattern->flags == DEV_MATCH_NONE)
                              continue;
      
                      if (((cur_pattern->flags & DEV_MATCH_PATH) != 0)
                       && (cur_pattern->path_id != device->target->bus->path_id))
                              continue;
      
                      if (((cur_pattern->flags & DEV_MATCH_TARGET) != 0)
                       && (cur_pattern->target_id != device->target->target_id))
                              continue;
      
                      if (((cur_pattern->flags & DEV_MATCH_LUN) != 0)
                       && (cur_pattern->target_lun != device->lun_id))
                              continue;
      
                      if (((cur_pattern->flags & DEV_MATCH_INQUIRY) != 0)
                       && (cam_quirkmatch((caddr_t)&device->inq_data,
                                          (caddr_t)&cur_pattern->data.inq_pat,
                                          1, sizeof(cur_pattern->data.inq_pat),
                                          scsi_static_inquiry_match) == NULL))
                              continue;
      
                      device_id_page = (struct scsi_vpd_device_id *)device->device_id;
                      if (((cur_pattern->flags & DEV_MATCH_DEVID) != 0)
                       && (device->device_id_len < SVPD_DEVICE_ID_HDR_LEN
                        || scsi_devid_match((uint8_t *)device_id_page->desc_list,
                                            device->device_id_len
                                          - SVPD_DEVICE_ID_HDR_LEN,
                                            cur_pattern->data.devid_pat.id,
                                            cur_pattern->data.devid_pat.id_len) != 0))
                              continue;
      
      copy_dev_node:
                      /*
                       * If we get to this point, the user definitely wants
                       * information on this device.  So tell the caller to copy
                       * the data out.
                       */
                      retval |= DM_RET_COPY;
      
                      /*
                       * If the return action has been set to descend, then we
                       * know that we've already seen a peripheral matching
                       * expression, therefore we need to further descend the tree.
                       * This won't change by continuing around the loop, so we
                       * go ahead and return.  If we haven't seen a peripheral
                       * matching expression, we keep going around the loop until
                       * we exhaust the matching expressions.  We'll set the stop
                       * flag once we fall out of the loop.
                       */
                      if ((retval & DM_RET_ACTION_MASK) == DM_RET_DESCEND)
                              return(retval);
              }
      
              /*
               * If the return action hasn't been set to descend yet, that means
               * we haven't seen any peripheral matching patterns.  So tell the
               * caller to stop descending the tree -- the user doesn't want to
               * match against lower level tree elements.
               */
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
                      retval |= DM_RET_STOP;
      
              return(retval);
      }
      
      /*
       * Match a single peripheral against any number of match patterns.
       */
      static dev_match_ret
      xptperiphmatch(struct dev_match_pattern *patterns, u_int num_patterns,
                     struct cam_periph *periph)
      {
              dev_match_ret retval;
              u_int i;
      
              /*
               * If we aren't given something to match against, that's an error.
               */
              if (periph == NULL)
                      return(DM_RET_ERROR);
      
              /*
               * If there are no match entries, then this peripheral matches no
               * matter what.
               */
              if ((patterns == NULL) || (num_patterns == 0))
                      return(DM_RET_STOP | DM_RET_COPY);
      
              /*
               * There aren't any nodes below a peripheral node, so there's no
               * reason to descend the tree any further.
               */
              retval = DM_RET_STOP;
      
              for (i = 0; i < num_patterns; i++) {
                      struct periph_match_pattern *cur_pattern;
      
                      /*
                       * If the pattern in question isn't for a peripheral, we
                       * aren't interested.
                       */
                      if (patterns[i].type != DEV_MATCH_PERIPH)
                              continue;
      
                      cur_pattern = &patterns[i].pattern.periph_pattern;
      
                      /*
                       * If they want to match on anything, then we will do so.
                       */
                      if (cur_pattern->flags == PERIPH_MATCH_ANY) {
                              /* set the copy flag */
                              retval |= DM_RET_COPY;
      
                              /*
                               * We've already set the return action to stop,
                               * since there are no nodes below peripherals in
                               * the tree.
                               */
                              return(retval);
                      }
      
                      /*
                       * Not sure why someone would do this...
                       */
                      if (cur_pattern->flags == PERIPH_MATCH_NONE)
                              continue;
      
                      if (((cur_pattern->flags & PERIPH_MATCH_PATH) != 0)
                       && (cur_pattern->path_id != periph->path->bus->path_id))
                              continue;
      
                      /*
                       * For the target and lun id's, we have to make sure the
                       * target and lun pointers aren't NULL.  The xpt peripheral
                       * has a wildcard target and device.
                       */
                      if (((cur_pattern->flags & PERIPH_MATCH_TARGET) != 0)
                       && ((periph->path->target == NULL)
                       ||(cur_pattern->target_id != periph->path->target->target_id)))
                              continue;
      
                      if (((cur_pattern->flags & PERIPH_MATCH_LUN) != 0)
                       && ((periph->path->device == NULL)
                       || (cur_pattern->target_lun != periph->path->device->lun_id)))
                              continue;
      
                      if (((cur_pattern->flags & PERIPH_MATCH_UNIT) != 0)
                       && (cur_pattern->unit_number != periph->unit_number))
                              continue;
      
                      if (((cur_pattern->flags & PERIPH_MATCH_NAME) != 0)
                       && (strncmp(cur_pattern->periph_name, periph->periph_name,
                                   DEV_IDLEN) != 0))
                              continue;
      
                      /*
                       * If we get to this point, the user definitely wants
                       * information on this peripheral.  So tell the caller to
                       * copy the data out.
                       */
                      retval |= DM_RET_COPY;
      
                      /*
                       * The return action has already been set to stop, since
                       * peripherals don't have any nodes below them in the EDT.
                       */
                      return(retval);
              }
      
              /*
               * If we get to this point, the peripheral that was passed in
               * doesn't match any of the patterns.
               */
              return(retval);
      }
      
      static int
      xptedtbusfunc(struct cam_eb *bus, void *arg)
      {
              struct ccb_dev_match *cdm;
              struct cam_et *target;
              dev_match_ret retval;
      
              cdm = (struct ccb_dev_match *)arg;
      
              /*
               * If our position is for something deeper in the tree, that means
               * that we've already seen this node.  So, we keep going down.
               */
              if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
               && (cdm->pos.cookie.bus == bus)
               && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
               && (cdm->pos.cookie.target != NULL))
                      retval = DM_RET_DESCEND;
              else
                      retval = xptbusmatch(cdm->patterns, cdm->num_patterns, bus);
      
              /*
               * If we got an error, bail out of the search.
               */
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
                      cdm->status = CAM_DEV_MATCH_ERROR;
                      return(0);
              }
      
              /*
               * If the copy flag is set, copy this bus out.
               */
              if (retval & DM_RET_COPY) {
                      int spaceleft, j;
      
                      spaceleft = cdm->match_buf_len - (cdm->num_matches *
                              sizeof(struct dev_match_result));
      
                      /*
                       * If we don't have enough space to put in another
                       * match result, save our position and tell the
                       * user there are more devices to check.
                       */
                      if (spaceleft < sizeof(struct dev_match_result)) {
                              bzero(&cdm->pos, sizeof(cdm->pos));
                              cdm->pos.position_type =
                                      CAM_DEV_POS_EDT | CAM_DEV_POS_BUS;
      
                              cdm->pos.cookie.bus = bus;
                              cdm->pos.generations[CAM_BUS_GENERATION]=
                                      xsoftc.bus_generation;
                              cdm->status = CAM_DEV_MATCH_MORE;
                              return(0);
                      }
                      j = cdm->num_matches;
                      cdm->num_matches++;
                      cdm->matches[j].type = DEV_MATCH_BUS;
                      cdm->matches[j].result.bus_result.path_id = bus->path_id;
                      cdm->matches[j].result.bus_result.bus_id = bus->sim->bus_id;
                      cdm->matches[j].result.bus_result.unit_number =
                              bus->sim->unit_number;
                      strlcpy(cdm->matches[j].result.bus_result.dev_name,
                              bus->sim->sim_name,
                              sizeof(cdm->matches[j].result.bus_result.dev_name));
              }
      
              /*
               * If the user is only interested in buses, there's no
               * reason to descend to the next level in the tree.
               */
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_STOP)
                      return(1);
      
              /*
               * If there is a target generation recorded, check it to
               * make sure the target list hasn't changed.
               */
              mtx_lock(&bus->eb_mtx);
              if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
               && (cdm->pos.cookie.bus == bus)
               && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
               && (cdm->pos.cookie.target != NULL)) {
                      if ((cdm->pos.generations[CAM_TARGET_GENERATION] !=
                          bus->generation)) {
                              mtx_unlock(&bus->eb_mtx);
                              cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
                              return (0);
                      }
                      target = (struct cam_et *)cdm->pos.cookie.target;
                      target->refcount++;
              } else
                      target = NULL;
              mtx_unlock(&bus->eb_mtx);
      
              return (xpttargettraverse(bus, target, xptedttargetfunc, arg));
      }
      
      static int
      xptedttargetfunc(struct cam_et *target, void *arg)
      {
              struct ccb_dev_match *cdm;
              struct cam_eb *bus;
              struct cam_ed *device;
      
              cdm = (struct ccb_dev_match *)arg;
              bus = target->bus;
      
              /*
               * If there is a device list generation recorded, check it to
               * make sure the device list hasn't changed.
               */
              mtx_lock(&bus->eb_mtx);
              if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
               && (cdm->pos.cookie.bus == bus)
               && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
               && (cdm->pos.cookie.target == target)
               && (cdm->pos.position_type & CAM_DEV_POS_DEVICE)
               && (cdm->pos.cookie.device != NULL)) {
                      if (cdm->pos.generations[CAM_DEV_GENERATION] !=
                          target->generation) {
                              mtx_unlock(&bus->eb_mtx);
                              cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
                              return(0);
                      }
                      device = (struct cam_ed *)cdm->pos.cookie.device;
                      device->refcount++;
              } else
                      device = NULL;
              mtx_unlock(&bus->eb_mtx);
      
              return (xptdevicetraverse(target, device, xptedtdevicefunc, arg));
      }
      
      static int
      xptedtdevicefunc(struct cam_ed *device, void *arg)
      {
              struct cam_eb *bus;
              struct cam_periph *periph;
              struct ccb_dev_match *cdm;
              dev_match_ret retval;
      
              cdm = (struct ccb_dev_match *)arg;
              bus = device->target->bus;
      
              /*
               * If our position is for something deeper in the tree, that means
               * that we've already seen this node.  So, we keep going down.
               */
              if ((cdm->pos.position_type & CAM_DEV_POS_DEVICE)
               && (cdm->pos.cookie.device == device)
               && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
               && (cdm->pos.cookie.periph != NULL))
                      retval = DM_RET_DESCEND;
              else
                      retval = xptdevicematch(cdm->patterns, cdm->num_patterns,
                                              device);
      
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
                      cdm->status = CAM_DEV_MATCH_ERROR;
                      return(0);
              }
      
              /*
               * If the copy flag is set, copy this device out.
               */
              if (retval & DM_RET_COPY) {
                      int spaceleft, j;
      
                      spaceleft = cdm->match_buf_len - (cdm->num_matches *
                              sizeof(struct dev_match_result));
      
                      /*
                       * If we don't have enough space to put in another
                       * match result, save our position and tell the
                       * user there are more devices to check.
                       */
                      if (spaceleft < sizeof(struct dev_match_result)) {
                              bzero(&cdm->pos, sizeof(cdm->pos));
                              cdm->pos.position_type =
                                      CAM_DEV_POS_EDT | CAM_DEV_POS_BUS |
                                      CAM_DEV_POS_TARGET | CAM_DEV_POS_DEVICE;
      
                              cdm->pos.cookie.bus = device->target->bus;
                              cdm->pos.generations[CAM_BUS_GENERATION]=
                                      xsoftc.bus_generation;
                              cdm->pos.cookie.target = device->target;
                              cdm->pos.generations[CAM_TARGET_GENERATION] =
                                      device->target->bus->generation;
                              cdm->pos.cookie.device = device;
                              cdm->pos.generations[CAM_DEV_GENERATION] =
                                      device->target->generation;
                              cdm->status = CAM_DEV_MATCH_MORE;
                              return(0);
                      }
                      j = cdm->num_matches;
                      cdm->num_matches++;
                      cdm->matches[j].type = DEV_MATCH_DEVICE;
                      cdm->matches[j].result.device_result.path_id =
                              device->target->bus->path_id;
                      cdm->matches[j].result.device_result.target_id =
                              device->target->target_id;
                      cdm->matches[j].result.device_result.target_lun =
                              device->lun_id;
                      cdm->matches[j].result.device_result.protocol =
                              device->protocol;
                      bcopy(&device->inq_data,
                            &cdm->matches[j].result.device_result.inq_data,
                            sizeof(struct scsi_inquiry_data));
                      bcopy(&device->ident_data,
                            &cdm->matches[j].result.device_result.ident_data,
                            sizeof(struct ata_params));
      
                      /* Let the user know whether this device is unconfigured */
                      if (device->flags & CAM_DEV_UNCONFIGURED)
                              cdm->matches[j].result.device_result.flags =
                                      DEV_RESULT_UNCONFIGURED;
                      else
                              cdm->matches[j].result.device_result.flags =
                                      DEV_RESULT_NOFLAG;
              }
      
              /*
               * If the user isn't interested in peripherals, don't descend
               * the tree any further.
               */
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_STOP)
                      return(1);
      
              /*
               * If there is a peripheral list generation recorded, make sure
               * it hasn't changed.
               */
              xpt_lock_buses();
              mtx_lock(&bus->eb_mtx);
              if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
               && (cdm->pos.cookie.bus == bus)
               && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
               && (cdm->pos.cookie.target == device->target)
               && (cdm->pos.position_type & CAM_DEV_POS_DEVICE)
               && (cdm->pos.cookie.device == device)
               && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
               && (cdm->pos.cookie.periph != NULL)) {
                      if (cdm->pos.generations[CAM_PERIPH_GENERATION] !=
                          device->generation) {
                              mtx_unlock(&bus->eb_mtx);
                              xpt_unlock_buses();
                              cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
                              return(0);
                      }
                      periph = (struct cam_periph *)cdm->pos.cookie.periph;
                      periph->refcount++;
              } else
                      periph = NULL;
              mtx_unlock(&bus->eb_mtx);
              xpt_unlock_buses();
      
              return (xptperiphtraverse(device, periph, xptedtperiphfunc, arg));
      }
      
      static int
      xptedtperiphfunc(struct cam_periph *periph, void *arg)
      {
              struct ccb_dev_match *cdm;
              dev_match_ret retval;
      
              cdm = (struct ccb_dev_match *)arg;
      
              retval = xptperiphmatch(cdm->patterns, cdm->num_patterns, periph);
      
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
                      cdm->status = CAM_DEV_MATCH_ERROR;
                      return(0);
              }
      
              /*
               * If the copy flag is set, copy this peripheral out.
               */
              if (retval & DM_RET_COPY) {
                      int spaceleft, j;
                      size_t l;
      
                      spaceleft = cdm->match_buf_len - (cdm->num_matches *
                              sizeof(struct dev_match_result));
      
                      /*
                       * If we don't have enough space to put in another
                       * match result, save our position and tell the
                       * user there are more devices to check.
                       */
                      if (spaceleft < sizeof(struct dev_match_result)) {
                              bzero(&cdm->pos, sizeof(cdm->pos));
                              cdm->pos.position_type =
                                      CAM_DEV_POS_EDT | CAM_DEV_POS_BUS |
                                      CAM_DEV_POS_TARGET | CAM_DEV_POS_DEVICE |
                                      CAM_DEV_POS_PERIPH;
      
                              cdm->pos.cookie.bus = periph->path->bus;
                              cdm->pos.generations[CAM_BUS_GENERATION]=
                                      xsoftc.bus_generation;
                              cdm->pos.cookie.target = periph->path->target;
                              cdm->pos.generations[CAM_TARGET_GENERATION] =
                                      periph->path->bus->generation;
                              cdm->pos.cookie.device = periph->path->device;
                              cdm->pos.generations[CAM_DEV_GENERATION] =
                                      periph->path->target->generation;
                              cdm->pos.cookie.periph = periph;
                              cdm->pos.generations[CAM_PERIPH_GENERATION] =
                                      periph->path->device->generation;
                              cdm->status = CAM_DEV_MATCH_MORE;
                              return(0);
                      }
      
                      j = cdm->num_matches;
                      cdm->num_matches++;
                      cdm->matches[j].type = DEV_MATCH_PERIPH;
                      cdm->matches[j].result.periph_result.path_id =
                              periph->path->bus->path_id;
                      cdm->matches[j].result.periph_result.target_id =
                              periph->path->target->target_id;
                      cdm->matches[j].result.periph_result.target_lun =
                              periph->path->device->lun_id;
                      cdm->matches[j].result.periph_result.unit_number =
                              periph->unit_number;
                      l = sizeof(cdm->matches[j].result.periph_result.periph_name);
                      strlcpy(cdm->matches[j].result.periph_result.periph_name,
                              periph->periph_name, l);
              }
      
              return(1);
      }
      
      static int
      xptedtmatch(struct ccb_dev_match *cdm)
      {
              struct cam_eb *bus;
              int ret;
      
              cdm->num_matches = 0;
      
              /*
               * Check the bus list generation.  If it has changed, the user
               * needs to reset everything and start over.
               */
              xpt_lock_buses();
              if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
               && (cdm->pos.cookie.bus != NULL)) {
                      if (cdm->pos.generations[CAM_BUS_GENERATION] !=
                          xsoftc.bus_generation) {
                              xpt_unlock_buses();
                              cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
                              return(0);
                      }
                      bus = (struct cam_eb *)cdm->pos.cookie.bus;
                      bus->refcount++;
              } else
                      bus = NULL;
              xpt_unlock_buses();
      
              ret = xptbustraverse(bus, xptedtbusfunc, cdm);
      
              /*
               * If we get back 0, that means that we had to stop before fully
               * traversing the EDT.  It also means that one of the subroutines
               * has set the status field to the proper value.  If we get back 1,
               * we've fully traversed the EDT and copied out any matching entries.
               */
              if (ret == 1)
                      cdm->status = CAM_DEV_MATCH_LAST;
      
              return(ret);
      }
      
      static int
      xptplistpdrvfunc(struct periph_driver **pdrv, void *arg)
      {
              struct cam_periph *periph;
              struct ccb_dev_match *cdm;
      
              cdm = (struct ccb_dev_match *)arg;
      
              xpt_lock_buses();
              if ((cdm->pos.position_type & CAM_DEV_POS_PDPTR)
               && (cdm->pos.cookie.pdrv == pdrv)
               && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
               && (cdm->pos.cookie.periph != NULL)) {
                      if (cdm->pos.generations[CAM_PERIPH_GENERATION] !=
                          (*pdrv)->generation) {
                              xpt_unlock_buses();
                              cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
                              return(0);
                      }
                      periph = (struct cam_periph *)cdm->pos.cookie.periph;
                      periph->refcount++;
              } else
                      periph = NULL;
              xpt_unlock_buses();
      
              return (xptpdperiphtraverse(pdrv, periph, xptplistperiphfunc, arg));
      }
      
      static int
      xptplistperiphfunc(struct cam_periph *periph, void *arg)
      {
              struct ccb_dev_match *cdm;
              dev_match_ret retval;
      
              cdm = (struct ccb_dev_match *)arg;
      
              retval = xptperiphmatch(cdm->patterns, cdm->num_patterns, periph);
      
              if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
                      cdm->status = CAM_DEV_MATCH_ERROR;
                      return(0);
              }
      
              /*
               * If the copy flag is set, copy this peripheral out.
               */
              if (retval & DM_RET_COPY) {
                      int spaceleft, j;
                      size_t l;
      
                      spaceleft = cdm->match_buf_len - (cdm->num_matches *
                              sizeof(struct dev_match_result));
      
                      /*
                       * If we don't have enough space to put in another
                       * match result, save our position and tell the
                       * user there are more devices to check.
                       */
                      if (spaceleft < sizeof(struct dev_match_result)) {
                              struct periph_driver **pdrv;
      
                              pdrv = NULL;
                              bzero(&cdm->pos, sizeof(cdm->pos));
                              cdm->pos.position_type =
                                      CAM_DEV_POS_PDRV | CAM_DEV_POS_PDPTR |
                                      CAM_DEV_POS_PERIPH;
      
                              /*
                               * This may look a bit non-sensical, but it is
                               * actually quite logical.  There are very few
                               * peripheral drivers, and bloating every peripheral
                               * structure with a pointer back to its parent
                               * peripheral driver linker set entry would cost
                               * more in the long run than doing this quick lookup.
                               */
                              for (pdrv = periph_drivers; *pdrv != NULL; pdrv++) {
                                      if (strcmp((*pdrv)->driver_name,
                                          periph->periph_name) == 0)
                                              break;
                              }
      
                              if (*pdrv == NULL) {
                                      cdm->status = CAM_DEV_MATCH_ERROR;
                                      return(0);
                              }
      
                              cdm->pos.cookie.pdrv = pdrv;
                              /*
                               * The periph generation slot does double duty, as
                               * does the periph pointer slot.  They are used for
                               * both edt and pdrv lookups and positioning.
                               */
                              cdm->pos.cookie.periph = periph;
                              cdm->pos.generations[CAM_PERIPH_GENERATION] =
                                      (*pdrv)->generation;
                              cdm->status = CAM_DEV_MATCH_MORE;
                              return(0);
                      }
      
                      j = cdm->num_matches;
                      cdm->num_matches++;
                      cdm->matches[j].type = DEV_MATCH_PERIPH;
                      cdm->matches[j].result.periph_result.path_id =
                              periph->path->bus->path_id;
      
                      /*
                       * The transport layer peripheral doesn't have a target or
                       * lun.
                       */
                      if (periph->path->target)
                              cdm->matches[j].result.periph_result.target_id =
                                      periph->path->target->target_id;
                      else
                              cdm->matches[j].result.periph_result.target_id =
                                      CAM_TARGET_WILDCARD;
      
                      if (periph->path->device)
                              cdm->matches[j].result.periph_result.target_lun =
                                      periph->path->device->lun_id;
                      else
                              cdm->matches[j].result.periph_result.target_lun =
                                      CAM_LUN_WILDCARD;
      
                      cdm->matches[j].result.periph_result.unit_number =
                              periph->unit_number;
                      l = sizeof(cdm->matches[j].result.periph_result.periph_name);
                      strlcpy(cdm->matches[j].result.periph_result.periph_name,
                              periph->periph_name, l);
              }
      
              return(1);
      }
      
      static int
      xptperiphlistmatch(struct ccb_dev_match *cdm)
      {
              int ret;
      
              cdm->num_matches = 0;
      
              /*
               * At this point in the edt traversal function, we check the bus
               * list generation to make sure that no buses have been added or
               * removed since the user last sent a XPT_DEV_MATCH ccb through.
               * For the peripheral driver list traversal function, however, we
               * don't have to worry about new peripheral driver types coming or
               * going; they're in a linker set, and therefore can't change
               * without a recompile.
               */
      
              if ((cdm->pos.position_type & CAM_DEV_POS_PDPTR)
               && (cdm->pos.cookie.pdrv != NULL))
                      ret = xptpdrvtraverse(
                                      (struct periph_driver **)cdm->pos.cookie.pdrv,
                                      xptplistpdrvfunc, cdm);
              else
                      ret = xptpdrvtraverse(NULL, xptplistpdrvfunc, cdm);
      
              /*
               * If we get back 0, that means that we had to stop before fully
               * traversing the peripheral driver tree.  It also means that one of
               * the subroutines has set the status field to the proper value.  If
               * we get back 1, we've fully traversed the EDT and copied out any
               * matching entries.
               */
              if (ret == 1)
                      cdm->status = CAM_DEV_MATCH_LAST;
      
              return(ret);
      }
      
      static int
      xptbustraverse(struct cam_eb *start_bus, xpt_busfunc_t *tr_func, void *arg)
      {
              struct cam_eb *bus, *next_bus;
              int retval;
      
              retval = 1;
              if (start_bus)
                      bus = start_bus;
              else {
                      xpt_lock_buses();
                      bus = TAILQ_FIRST(&xsoftc.xpt_busses);
                      if (bus == NULL) {
                              xpt_unlock_buses();
                              return (retval);
                      }
                      bus->refcount++;
                      xpt_unlock_buses();
              }
              for (; bus != NULL; bus = next_bus) {
                      retval = tr_func(bus, arg);
                      if (retval == 0) {
                              xpt_release_bus(bus);
                              break;
                      }
                      xpt_lock_buses();
                      next_bus = TAILQ_NEXT(bus, links);
                      if (next_bus)
                              next_bus->refcount++;
                      xpt_unlock_buses();
                      xpt_release_bus(bus);
              }
              return(retval);
      }
      
      static int
      xpttargettraverse(struct cam_eb *bus, struct cam_et *start_target,
                        xpt_targetfunc_t *tr_func, void *arg)
      {
              struct cam_et *target, *next_target;
              int retval;
      
              retval = 1;
              if (start_target)
                      target = start_target;
              else {
                      mtx_lock(&bus->eb_mtx);
                      target = TAILQ_FIRST(&bus->et_entries);
                      if (target == NULL) {
                              mtx_unlock(&bus->eb_mtx);
                              return (retval);
                      }
                      target->refcount++;
                      mtx_unlock(&bus->eb_mtx);
              }
              for (; target != NULL; target = next_target) {
                      retval = tr_func(target, arg);
                      if (retval == 0) {
                              xpt_release_target(target);
                              break;
                      }
                      mtx_lock(&bus->eb_mtx);
                      next_target = TAILQ_NEXT(target, links);
                      if (next_target)
                              next_target->refcount++;
                      mtx_unlock(&bus->eb_mtx);
                      xpt_release_target(target);
              }
              return(retval);
      }
      
      static int
      xptdevicetraverse(struct cam_et *target, struct cam_ed *start_device,
                        xpt_devicefunc_t *tr_func, void *arg)
      {
              struct cam_eb *bus;
              struct cam_ed *device, *next_device;
              int retval;
      
              retval = 1;
              bus = target->bus;
              if (start_device)
                      device = start_device;
              else {
                      mtx_lock(&bus->eb_mtx);
                      device = TAILQ_FIRST(&target->ed_entries);
                      if (device == NULL) {
                              mtx_unlock(&bus->eb_mtx);
                              return (retval);
                      }
                      device->refcount++;
                      mtx_unlock(&bus->eb_mtx);
              }
              for (; device != NULL; device = next_device) {
                      mtx_lock(&device->device_mtx);
                      retval = tr_func(device, arg);
                      mtx_unlock(&device->device_mtx);
                      if (retval == 0) {
                              xpt_release_device(device);
                              break;
                      }
                      mtx_lock(&bus->eb_mtx);
                      next_device = TAILQ_NEXT(device, links);
                      if (next_device)
                              next_device->refcount++;
                      mtx_unlock(&bus->eb_mtx);
                      xpt_release_device(device);
              }
              return(retval);
      }
      
      static int
      xptperiphtraverse(struct cam_ed *device, struct cam_periph *start_periph,
                        xpt_periphfunc_t *tr_func, void *arg)
      {
              struct cam_eb *bus;
              struct cam_periph *periph, *next_periph;
              int retval;
      
              retval = 1;
      
              bus = device->target->bus;
              if (start_periph)
                      periph = start_periph;
              else {
                      xpt_lock_buses();
                      mtx_lock(&bus->eb_mtx);
                      periph = SLIST_FIRST(&device->periphs);
                      while (periph != NULL && (periph->flags & CAM_PERIPH_FREE) != 0)
                              periph = SLIST_NEXT(periph, periph_links);
                      if (periph == NULL) {
                              mtx_unlock(&bus->eb_mtx);
                              xpt_unlock_buses();
                              return (retval);
                      }
                      periph->refcount++;
                      mtx_unlock(&bus->eb_mtx);
                      xpt_unlock_buses();
              }
              for (; periph != NULL; periph = next_periph) {
                      retval = tr_func(periph, arg);
                      if (retval == 0) {
                              cam_periph_release_locked(periph);
                              break;
                      }
                      xpt_lock_buses();
                      mtx_lock(&bus->eb_mtx);
                      next_periph = SLIST_NEXT(periph, periph_links);
                      while (next_periph != NULL &&
                          (next_periph->flags & CAM_PERIPH_FREE) != 0)
                              next_periph = SLIST_NEXT(next_periph, periph_links);
                      if (next_periph)
                              next_periph->refcount++;
                      mtx_unlock(&bus->eb_mtx);
                      xpt_unlock_buses();
                      cam_periph_release_locked(periph);
              }
              return(retval);
      }
      
      static int
      xptpdrvtraverse(struct periph_driver **start_pdrv,
                      xpt_pdrvfunc_t *tr_func, void *arg)
      {
              struct periph_driver **pdrv;
              int retval;
      
              retval = 1;
      
              /*
               * We don't traverse the peripheral driver list like we do the
               * other lists, because it is a linker set, and therefore cannot be
               * changed during runtime.  If the peripheral driver list is ever
               * re-done to be something other than a linker set (i.e. it can
               * change while the system is running), the list traversal should
               * be modified to work like the other traversal functions.
               */
              for (pdrv = (start_pdrv ? start_pdrv : periph_drivers);
                   *pdrv != NULL; pdrv++) {
                      retval = tr_func(pdrv, arg);
      
                      if (retval == 0)
                              return(retval);
              }
      
              return(retval);
      }
      
      static int
      xptpdperiphtraverse(struct periph_driver **pdrv,
                          struct cam_periph *start_periph,
                          xpt_periphfunc_t *tr_func, void *arg)
      {
              struct cam_periph *periph, *next_periph;
              int retval;
      
              retval = 1;
      
              if (start_periph)
                      periph = start_periph;
              else {
                      xpt_lock_buses();
                      periph = TAILQ_FIRST(&(*pdrv)->units);
                      while (periph != NULL && (periph->flags & CAM_PERIPH_FREE) != 0)
                              periph = TAILQ_NEXT(periph, unit_links);
                      if (periph == NULL) {
                              xpt_unlock_buses();
                              return (retval);
                      }
                      periph->refcount++;
                      xpt_unlock_buses();
              }
              for (; periph != NULL; periph = next_periph) {
                      cam_periph_lock(periph);
                      retval = tr_func(periph, arg);
                      cam_periph_unlock(periph);
                      if (retval == 0) {
                              cam_periph_release(periph);
                              break;
                      }
                      xpt_lock_buses();
                      next_periph = TAILQ_NEXT(periph, unit_links);
                      while (next_periph != NULL &&
                          (next_periph->flags & CAM_PERIPH_FREE) != 0)
                              next_periph = TAILQ_NEXT(next_periph, unit_links);
                      if (next_periph)
                              next_periph->refcount++;
                      xpt_unlock_buses();
                      cam_periph_release(periph);
              }
              return(retval);
      }
      
      static int
      xptdefbusfunc(struct cam_eb *bus, void *arg)
      {
              struct xpt_traverse_config *tr_config;
      
              tr_config = (struct xpt_traverse_config *)arg;
      
              if (tr_config->depth == XPT_DEPTH_BUS) {
                      xpt_busfunc_t *tr_func;
      
                      tr_func = (xpt_busfunc_t *)tr_config->tr_func;
      
                      return(tr_func(bus, tr_config->tr_arg));
              } else
                      return(xpttargettraverse(bus, NULL, xptdeftargetfunc, arg));
      }
      
      static int
      xptdeftargetfunc(struct cam_et *target, void *arg)
      {
              struct xpt_traverse_config *tr_config;
      
              tr_config = (struct xpt_traverse_config *)arg;
      
              if (tr_config->depth == XPT_DEPTH_TARGET) {
                      xpt_targetfunc_t *tr_func;
      
                      tr_func = (xpt_targetfunc_t *)tr_config->tr_func;
      
                      return(tr_func(target, tr_config->tr_arg));
              } else
                      return(xptdevicetraverse(target, NULL, xptdefdevicefunc, arg));
      }
      
      static int
      xptdefdevicefunc(struct cam_ed *device, void *arg)
      {
              struct xpt_traverse_config *tr_config;
      
              tr_config = (struct xpt_traverse_config *)arg;
      
              if (tr_config->depth == XPT_DEPTH_DEVICE) {
                      xpt_devicefunc_t *tr_func;
      
                      tr_func = (xpt_devicefunc_t *)tr_config->tr_func;
      
                      return(tr_func(device, tr_config->tr_arg));
              } else
                      return(xptperiphtraverse(device, NULL, xptdefperiphfunc, arg));
      }
      
      static int
      xptdefperiphfunc(struct cam_periph *periph, void *arg)
      {
              struct xpt_traverse_config *tr_config;
              xpt_periphfunc_t *tr_func;
      
              tr_config = (struct xpt_traverse_config *)arg;
      
              tr_func = (xpt_periphfunc_t *)tr_config->tr_func;
      
              /*
               * Unlike the other default functions, we don't check for depth
               * here.  The peripheral driver level is the last level in the EDT,
               * so if we're here, we should execute the function in question.
               */
              return(tr_func(periph, tr_config->tr_arg));
      }
      
      /*
       * Execute the given function for every bus in the EDT.
       */
      static int
      xpt_for_all_busses(xpt_busfunc_t *tr_func, void *arg)
      {
              struct xpt_traverse_config tr_config;
      
              tr_config.depth = XPT_DEPTH_BUS;
              tr_config.tr_func = tr_func;
              tr_config.tr_arg = arg;
      
              return(xptbustraverse(NULL, xptdefbusfunc, &tr_config));
      }
      
      /*
       * Execute the given function for every device in the EDT.
       */
      static int
      xpt_for_all_devices(xpt_devicefunc_t *tr_func, void *arg)
      {
              struct xpt_traverse_config tr_config;
      
              tr_config.depth = XPT_DEPTH_DEVICE;
              tr_config.tr_func = tr_func;
              tr_config.tr_arg = arg;
      
              return(xptbustraverse(NULL, xptdefbusfunc, &tr_config));
      }
      
      static int
      xptsetasyncfunc(struct cam_ed *device, void *arg)
      {
              struct cam_path path;
              struct ccb_getdev cgd;
              struct ccb_setasync *csa = (struct ccb_setasync *)arg;
      
              /*
               * Don't report unconfigured devices (Wildcard devs,
               * devices only for target mode, device instances
               * that have been invalidated but are waiting for
               * their last reference count to be released).
               */
              if ((device->flags & CAM_DEV_UNCONFIGURED) != 0)
                      return (1);
      
              xpt_compile_path(&path,
                               NULL,
                               device->target->bus->path_id,
                               device->target->target_id,
                               device->lun_id);
              xpt_setup_ccb(&cgd.ccb_h, &path, CAM_PRIORITY_NORMAL);
              cgd.ccb_h.func_code = XPT_GDEV_TYPE;
              xpt_action((union ccb *)&cgd);
              csa->callback(csa->callback_arg,
                                  AC_FOUND_DEVICE,
                                  &path, &cgd);
              xpt_release_path(&path);
      
              return(1);
      }
      
      static int
      xptsetasyncbusfunc(struct cam_eb *bus, void *arg)
      {
              struct cam_path path;
              struct ccb_pathinq cpi;
              struct ccb_setasync *csa = (struct ccb_setasync *)arg;
      
              xpt_compile_path(&path, /*periph*/NULL,
                               bus->path_id,
                               CAM_TARGET_WILDCARD,
                               CAM_LUN_WILDCARD);
              xpt_path_lock(&path);
              xpt_path_inq(&cpi, &path);
              csa->callback(csa->callback_arg,
                                  AC_PATH_REGISTERED,
                                  &path, &cpi);
              xpt_path_unlock(&path);
              xpt_release_path(&path);
      
              return(1);
      }
      
      void
      xpt_action(union ccb *start_ccb)
 3162 {
      
              CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE,
                  ("xpt_action: func %#x %s\n", start_ccb->ccb_h.func_code,
                      xpt_action_name(start_ccb->ccb_h.func_code)));
      
              start_ccb->ccb_h.status = CAM_REQ_INPROG;
              (*(start_ccb->ccb_h.path->bus->xport->ops->action))(start_ccb);
      }
      
      void
      xpt_action_default(union ccb *start_ccb)
 3162 {
              struct cam_path *path;
              struct cam_sim *sim;
              struct mtx *mtx;
      
              path = start_ccb->ccb_h.path;
              CAM_DEBUG(path, CAM_DEBUG_TRACE,
                  ("xpt_action_default: func %#x %s\n", start_ccb->ccb_h.func_code,
                      xpt_action_name(start_ccb->ccb_h.func_code)));
      
              switch (start_ccb->ccb_h.func_code) {
              case XPT_SCSI_IO:
              {
                      struct cam_ed *device;
      
                      /*
                       * For the sake of compatibility with SCSI-1
                       * devices that may not understand the identify
                       * message, we include lun information in the
                       * second byte of all commands.  SCSI-1 specifies
                       * that luns are a 3 bit value and reserves only 3
                       * bits for lun information in the CDB.  Later
                       * revisions of the SCSI spec allow for more than 8
                       * luns, but have deprecated lun information in the
                       * CDB.  So, if the lun won't fit, we must omit.
                       *
                       * Also be aware that during initial probing for devices,
                       * the inquiry information is unknown but initialized to 0.
                       * This means that this code will be exercised while probing
                       * devices with an ANSI revision greater than 2.
                       */
                      device = path->device;
                      if (device->protocol_version <= SCSI_REV_2
 3162                  && start_ccb->ccb_h.target_lun < 8
                       && (start_ccb->ccb_h.flags & CAM_CDB_POINTER) == 0) {
                              start_ccb->csio.cdb_io.cdb_bytes[1] |=
                                  start_ccb->ccb_h.target_lun << 5;
                      }
                      start_ccb->csio.scsi_status = SCSI_STATUS_OK;
              }
              /* FALLTHROUGH */
              case XPT_TARGET_IO:
              case XPT_CONT_TARGET_IO:
                      start_ccb->csio.sense_resid = 0;
                      start_ccb->csio.resid = 0;
                      /* FALLTHROUGH */
              case XPT_ATA_IO:
                      if (start_ccb->ccb_h.func_code == XPT_ATA_IO)
                              start_ccb->ataio.resid = 0;
                      /* FALLTHROUGH */
              case XPT_NVME_IO:
              case XPT_NVME_ADMIN:
              case XPT_MMC_IO:
              case XPT_RESET_DEV:
              case XPT_ENG_EXEC:
              case XPT_SMP_IO:
              {
                      struct cam_devq *devq;
      
                      devq = path->bus->sim->devq;
                      mtx_lock(&devq->send_mtx);
                      cam_ccbq_insert_ccb(&path->device->ccbq, start_ccb);
 3162                 if (xpt_schedule_devq(devq, path->device) != 0)
                              xpt_run_devq(devq);
                      mtx_unlock(&devq->send_mtx);
                      break;
              }
              case XPT_CALC_GEOMETRY:
                      /* Filter out garbage */
                      if (start_ccb->ccg.block_size == 0
                       || start_ccb->ccg.volume_size == 0) {
                              start_ccb->ccg.cylinders = 0;
                              start_ccb->ccg.heads = 0;
                              start_ccb->ccg.secs_per_track = 0;
                              start_ccb->ccb_h.status = CAM_REQ_CMP;
                              break;
                      }
                      goto call_sim;
              case XPT_ABORT:
              {
                      union ccb* abort_ccb;
      
                      abort_ccb = start_ccb->cab.abort_ccb;
                      if (XPT_FC_IS_DEV_QUEUED(abort_ccb)) {
                              struct cam_ed *device;
                              struct cam_devq *devq;
      
                              device = abort_ccb->ccb_h.path->device;
                              devq = device->sim->devq;
      
                              mtx_lock(&devq->send_mtx);
                              if (abort_ccb->ccb_h.pinfo.index > 0) {
                                      cam_ccbq_remove_ccb(&device->ccbq, abort_ccb);
                                      abort_ccb->ccb_h.status =
                                          CAM_REQ_ABORTED|CAM_DEV_QFRZN;
                                      xpt_freeze_devq_device(device, 1);
                                      mtx_unlock(&devq->send_mtx);
                                      xpt_done(abort_ccb);
                                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                                      break;
                              }
                              mtx_unlock(&devq->send_mtx);
      
                              if (abort_ccb->ccb_h.pinfo.index == CAM_UNQUEUED_INDEX
                               && (abort_ccb->ccb_h.status & CAM_SIM_QUEUED) == 0) {
                                      /*
                                       * We've caught this ccb en route to
                                       * the SIM.  Flag it for abort and the
                                       * SIM will do so just before starting
                                       * real work on the CCB.
                                       */
                                      abort_ccb->ccb_h.status =
                                          CAM_REQ_ABORTED|CAM_DEV_QFRZN;
                                      xpt_freeze_devq(abort_ccb->ccb_h.path, 1);
                                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                                      break;
                              }
                      }
                      if (XPT_FC_IS_QUEUED(abort_ccb)
                       && (abort_ccb->ccb_h.pinfo.index == CAM_DONEQ_INDEX)) {
                              /*
                               * It's already completed but waiting
                               * for our SWI to get to it.
                               */
                              start_ccb->ccb_h.status = CAM_UA_ABORT;
                              break;
                      }
                      /*
                       * If we weren't able to take care of the abort request
                       * in the XPT, pass the request down to the SIM for processing.
                       */
              }
              /* FALLTHROUGH */
              case XPT_ACCEPT_TARGET_IO:
              case XPT_EN_LUN:
              case XPT_IMMED_NOTIFY:
              case XPT_NOTIFY_ACK:
              case XPT_RESET_BUS:
              case XPT_IMMEDIATE_NOTIFY:
              case XPT_NOTIFY_ACKNOWLEDGE:
              case XPT_GET_SIM_KNOB_OLD:
              case XPT_GET_SIM_KNOB:
              case XPT_SET_SIM_KNOB:
              case XPT_GET_TRAN_SETTINGS:
              case XPT_SET_TRAN_SETTINGS:
              case XPT_PATH_INQ:
      call_sim:
                      sim = path->bus->sim;
                      mtx = sim->mtx;
                      if (mtx && !mtx_owned(mtx))
                              mtx_lock(mtx);
                      else
                              mtx = NULL;
      
                      CAM_DEBUG(path, CAM_DEBUG_TRACE,
                          ("Calling sim->sim_action(): func=%#x\n", start_ccb->ccb_h.func_code));
                      (*(sim->sim_action))(sim, start_ccb);
                      CAM_DEBUG(path, CAM_DEBUG_TRACE,
                          ("sim->sim_action returned: status=%#x\n", start_ccb->ccb_h.status));
                      if (mtx)
                              mtx_unlock(mtx);
                      break;
              case XPT_PATH_STATS:
                      start_ccb->cpis.last_reset = path->bus->last_reset;
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      break;
              case XPT_GDEV_TYPE:
              {
                      struct cam_ed *dev;
      
                      dev = path->device;
                      if ((dev->flags & CAM_DEV_UNCONFIGURED) != 0) {
                              start_ccb->ccb_h.status = CAM_DEV_NOT_THERE;
                      } else {
                              struct ccb_getdev *cgd;
      
                              cgd = &start_ccb->cgd;
                              cgd->protocol = dev->protocol;
                              cgd->inq_data = dev->inq_data;
                              cgd->ident_data = dev->ident_data;
                              cgd->inq_flags = dev->inq_flags;
                              cgd->ccb_h.status = CAM_REQ_CMP;
                              cgd->serial_num_len = dev->serial_num_len;
                              if ((dev->serial_num_len > 0)
                               && (dev->serial_num != NULL))
                                      bcopy(dev->serial_num, cgd->serial_num,
                                            dev->serial_num_len);
                      }
                      break;
              }
              case XPT_GDEV_STATS:
              {
                      struct ccb_getdevstats *cgds = &start_ccb->cgds;
                      struct cam_ed *dev = path->device;
                      struct cam_eb *bus = path->bus;
                      struct cam_et *tar = path->target;
                      struct cam_devq *devq = bus->sim->devq;
      
                      mtx_lock(&devq->send_mtx);
                      cgds->dev_openings = dev->ccbq.dev_openings;
                      cgds->dev_active = dev->ccbq.dev_active;
                      cgds->allocated = dev->ccbq.allocated;
                      cgds->queued = cam_ccbq_pending_ccb_count(&dev->ccbq);
                      cgds->held = cgds->allocated - cgds->dev_active - cgds->queued;
                      cgds->last_reset = tar->last_reset;
                      cgds->maxtags = dev->maxtags;
                      cgds->mintags = dev->mintags;
                      if (timevalcmp(&tar->last_reset, &bus->last_reset, <))
                              cgds->last_reset = bus->last_reset;
                      mtx_unlock(&devq->send_mtx);
                      cgds->ccb_h.status = CAM_REQ_CMP;
                      break;
              }
              case XPT_GDEVLIST:
              {
                      struct cam_periph        *nperiph;
                      struct periph_list        *periph_head;
                      struct ccb_getdevlist        *cgdl;
                      u_int                        i;
                      struct cam_ed                *device;
                      int                        found;
      
                      found = 0;
      
                      /*
                       * Don't want anyone mucking with our data.
                       */
                      device = path->device;
                      periph_head = &device->periphs;
                      cgdl = &start_ccb->cgdl;
      
                      /*
                       * Check and see if the list has changed since the user
                       * last requested a list member.  If so, tell them that the
                       * list has changed, and therefore they need to start over
                       * from the beginning.
                       */
                      if ((cgdl->index != 0) &&
                          (cgdl->generation != device->generation)) {
                              cgdl->status = CAM_GDEVLIST_LIST_CHANGED;
                              break;
                      }
      
                      /*
                       * Traverse the list of peripherals and attempt to find
                       * the requested peripheral.
                       */
                      for (nperiph = SLIST_FIRST(periph_head), i = 0;
                           (nperiph != NULL) && (i <= cgdl->index);
                           nperiph = SLIST_NEXT(nperiph, periph_links), i++) {
                              if (i == cgdl->index) {
                                      strlcpy(cgdl->periph_name,
                                              nperiph->periph_name,
                                              sizeof(cgdl->periph_name));
                                      cgdl->unit_number = nperiph->unit_number;
                                      found = 1;
                              }
                      }
                      if (found == 0) {
                              cgdl->status = CAM_GDEVLIST_ERROR;
                              break;
                      }
      
                      if (nperiph == NULL)
                              cgdl->status = CAM_GDEVLIST_LAST_DEVICE;
                      else
                              cgdl->status = CAM_GDEVLIST_MORE_DEVS;
      
                      cgdl->index++;
                      cgdl->generation = device->generation;
      
                      cgdl->ccb_h.status = CAM_REQ_CMP;
                      break;
              }
              case XPT_DEV_MATCH:
              {
                      dev_pos_type position_type;
                      struct ccb_dev_match *cdm;
      
                      cdm = &start_ccb->cdm;
      
                      /*
                       * There are two ways of getting at information in the EDT.
                       * The first way is via the primary EDT tree.  It starts
                       * with a list of buses, then a list of targets on a bus,
                       * then devices/luns on a target, and then peripherals on a
                       * device/lun.  The "other" way is by the peripheral driver
                       * lists.  The peripheral driver lists are organized by
                       * peripheral driver.  (obviously)  So it makes sense to
                       * use the peripheral driver list if the user is looking
                       * for something like "da1", or all "da" devices.  If the
                       * user is looking for something on a particular bus/target
                       * or lun, it's generally better to go through the EDT tree.
                       */
      
                      if (cdm->pos.position_type != CAM_DEV_POS_NONE)
                              position_type = cdm->pos.position_type;
                      else {
                              u_int i;
      
                              position_type = CAM_DEV_POS_NONE;
      
                              for (i = 0; i < cdm->num_patterns; i++) {
                                      if ((cdm->patterns[i].type == DEV_MATCH_BUS)
                                       ||(cdm->patterns[i].type == DEV_MATCH_DEVICE)){
                                              position_type = CAM_DEV_POS_EDT;
                                              break;
                                      }
                              }
      
                              if (cdm->num_patterns == 0)
                                      position_type = CAM_DEV_POS_EDT;
                              else if (position_type == CAM_DEV_POS_NONE)
                                      position_type = CAM_DEV_POS_PDRV;
                      }
      
                      switch(position_type & CAM_DEV_POS_TYPEMASK) {
                      case CAM_DEV_POS_EDT:
                              xptedtmatch(cdm);
                              break;
                      case CAM_DEV_POS_PDRV:
                              xptperiphlistmatch(cdm);
                              break;
                      default:
                              cdm->status = CAM_DEV_MATCH_ERROR;
                              break;
                      }
      
                      if (cdm->status == CAM_DEV_MATCH_ERROR)
                              start_ccb->ccb_h.status = CAM_REQ_CMP_ERR;
                      else
                              start_ccb->ccb_h.status = CAM_REQ_CMP;
      
                      break;
              }
              case XPT_SASYNC_CB:
              {
                      struct ccb_setasync *csa;
                      struct async_node *cur_entry;
                      struct async_list *async_head;
                      u_int32_t added;
      
                      csa = &start_ccb->csa;
                      added = csa->event_enable;
                      async_head = &path->device->asyncs;
      
                      /*
                       * If there is already an entry for us, simply
                       * update it.
                       */
                      cur_entry = SLIST_FIRST(async_head);
                      while (cur_entry != NULL) {
                              if ((cur_entry->callback_arg == csa->callback_arg)
                               && (cur_entry->callback == csa->callback))
                                      break;
                              cur_entry = SLIST_NEXT(cur_entry, links);
                      }
      
                      if (cur_entry != NULL) {
                               /*
                               * If the request has no flags set,
                               * remove the entry.
                               */
                              added &= ~cur_entry->event_enable;
                              if (csa->event_enable == 0) {
                                      SLIST_REMOVE(async_head, cur_entry,
                                                   async_node, links);
                                      xpt_release_device(path->device);
                                      free(cur_entry, M_CAMXPT);
                              } else {
                                      cur_entry->event_enable = csa->event_enable;
                              }
                              csa->event_enable = added;
                      } else {
                              cur_entry = malloc(sizeof(*cur_entry), M_CAMXPT,
                                                 M_NOWAIT);
                              if (cur_entry == NULL) {
                                      csa->ccb_h.status = CAM_RESRC_UNAVAIL;
                                      break;
                              }
                              cur_entry->event_enable = csa->event_enable;
                              cur_entry->event_lock = (path->bus->sim->mtx &&
                                  mtx_owned(path->bus->sim->mtx)) ? 1 : 0;
                              cur_entry->callback_arg = csa->callback_arg;
                              cur_entry->callback = csa->callback;
                              SLIST_INSERT_HEAD(async_head, cur_entry, links);
                              xpt_acquire_device(path->device);
                      }
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      break;
              }
              case XPT_REL_SIMQ:
              {
                      struct ccb_relsim *crs;
                      struct cam_ed *dev;
      
                      crs = &start_ccb->crs;
                      dev = path->device;
                      if (dev == NULL) {
                              crs->ccb_h.status = CAM_DEV_NOT_THERE;
                              break;
                      }
      
                      if ((crs->release_flags & RELSIM_ADJUST_OPENINGS) != 0) {
                              /* Don't ever go below one opening */
                              if (crs->openings > 0) {
                                      xpt_dev_ccbq_resize(path, crs->openings);
                                      if (bootverbose) {
                                              xpt_print(path,
                                                  "number of openings is now %d\n",
                                                  crs->openings);
                                      }
                              }
                      }
      
                      mtx_lock(&dev->sim->devq->send_mtx);
                      if ((crs->release_flags & RELSIM_RELEASE_AFTER_TIMEOUT) != 0) {
                              if ((dev->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0) {
                                      /*
                                       * Just extend the old timeout and decrement
                                       * the freeze count so that a single timeout
                                       * is sufficient for releasing the queue.
                                       */
                                      start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
                                      callout_stop(&dev->callout);
                              } else {
                                      start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
                              }
      
                              callout_reset_sbt(&dev->callout,
                                  SBT_1MS * crs->release_timeout, 0,
                                  xpt_release_devq_timeout, dev, 0);
      
                              dev->flags |= CAM_DEV_REL_TIMEOUT_PENDING;
                      }
      
                      if ((crs->release_flags & RELSIM_RELEASE_AFTER_CMDCMPLT) != 0) {
                              if ((dev->flags & CAM_DEV_REL_ON_COMPLETE) != 0) {
                                      /*
                                       * Decrement the freeze count so that a single
                                       * completion is still sufficient to unfreeze
                                       * the queue.
                                       */
                                      start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
                              } else {
                                      dev->flags |= CAM_DEV_REL_ON_COMPLETE;
                                      start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
                              }
                      }
      
                      if ((crs->release_flags & RELSIM_RELEASE_AFTER_QEMPTY) != 0) {
                              if ((dev->flags & CAM_DEV_REL_ON_QUEUE_EMPTY) != 0
                               || (dev->ccbq.dev_active == 0)) {
                                      start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
                              } else {
                                      dev->flags |= CAM_DEV_REL_ON_QUEUE_EMPTY;
                                      start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
                              }
                      }
                      mtx_unlock(&dev->sim->devq->send_mtx);
      
                      if ((start_ccb->ccb_h.flags & CAM_DEV_QFREEZE) == 0)
                              xpt_release_devq(path, /*count*/1, /*run_queue*/TRUE);
                      start_ccb->crs.qfrozen_cnt = dev->ccbq.queue.qfrozen_cnt;
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      break;
              }
              case XPT_DEBUG: {
                      struct cam_path *oldpath;
      
                      /* Check that all request bits are supported. */
                      if (start_ccb->cdbg.flags & ~(CAM_DEBUG_COMPILE)) {
                              start_ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
                              break;
                      }
      
                      cam_dflags = CAM_DEBUG_NONE;
                      if (cam_dpath != NULL) {
                              oldpath = cam_dpath;
                              cam_dpath = NULL;
                              xpt_free_path(oldpath);
                      }
                      if (start_ccb->cdbg.flags != CAM_DEBUG_NONE) {
                              if (xpt_create_path(&cam_dpath, NULL,
                                                  start_ccb->ccb_h.path_id,
                                                  start_ccb->ccb_h.target_id,
                                                  start_ccb->ccb_h.target_lun) !=
                                                  CAM_REQ_CMP) {
                                      start_ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
                              } else {
                                      cam_dflags = start_ccb->cdbg.flags;
                                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                                      xpt_print(cam_dpath, "debugging flags now %x\n",
                                          cam_dflags);
                              }
                      } else
                              start_ccb->ccb_h.status = CAM_REQ_CMP;
                      break;
              }
              case XPT_NOOP:
                      if ((start_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0)
                              xpt_freeze_devq(path, 1);
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      break;
              case XPT_REPROBE_LUN:
                      xpt_async(AC_INQ_CHANGED, path, NULL);
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      xpt_done(start_ccb);
                      break;
              case XPT_ASYNC:
                      start_ccb->ccb_h.status = CAM_REQ_CMP;
                      xpt_done(start_ccb);
                      break;
              default:
              case XPT_SDEV_TYPE:
              case XPT_TERM_IO:
              case XPT_ENG_INQ:
                      /* XXX Implement */
                      xpt_print(start_ccb->ccb_h.path,
                          "%s: CCB type %#x %s not supported\n", __func__,
                          start_ccb->ccb_h.func_code,
                          xpt_action_name(start_ccb->ccb_h.func_code));
                      start_ccb->ccb_h.status = CAM_PROVIDE_FAIL;
                      if (start_ccb->ccb_h.func_code & XPT_FC_DEV_QUEUED) {
                              xpt_done(start_ccb);
                      }
                      break;
              }
              CAM_DEBUG(path, CAM_DEBUG_TRACE,
                  ("xpt_action_default: func= %#x %s status %#x\n",
                      start_ccb->ccb_h.func_code,
                       xpt_action_name(start_ccb->ccb_h.func_code),
                      start_ccb->ccb_h.status));
      }
      
      /*
       * Call the sim poll routine to allow the sim to complete
       * any inflight requests, then call camisr_runqueue to
       * complete any CCB that the polling completed.
       */
      void
      xpt_sim_poll(struct cam_sim *sim)
      {
              struct mtx *mtx;
      
              mtx = sim->mtx;
              if (mtx)
                      mtx_lock(mtx);
              (*(sim->sim_poll))(sim);
              if (mtx)
                      mtx_unlock(mtx);
              camisr_runqueue();
      }
      
      uint32_t
      xpt_poll_setup(union ccb *start_ccb)
      {
              u_int32_t timeout;
              struct          cam_sim *sim;
              struct          cam_devq *devq;
              struct          cam_ed *dev;
      
              timeout = start_ccb->ccb_h.timeout * 10;
              sim = start_ccb->ccb_h.path->bus->sim;
              devq = sim->devq;
              dev = start_ccb->ccb_h.path->device;
      
              /*
               * Steal an opening so that no other queued requests
               * can get it before us while we simulate interrupts.
               */
              mtx_lock(&devq->send_mtx);
              dev->ccbq.dev_openings--;
              while((devq->send_openings <= 0 || dev->ccbq.dev_openings < 0) &&
                  (--timeout > 0)) {
                      mtx_unlock(&devq->send_mtx);
                      DELAY(100);
                      xpt_sim_poll(sim);
                      mtx_lock(&devq->send_mtx);
              }
              dev->ccbq.dev_openings++;
              mtx_unlock(&devq->send_mtx);
      
              return (timeout);
      }
      
      void
      xpt_pollwait(union ccb *start_ccb, uint32_t timeout)
      {
      
              while (--timeout > 0) {
                      xpt_sim_poll(start_ccb->ccb_h.path->bus->sim);
                      if ((start_ccb->ccb_h.status & CAM_STATUS_MASK)
                          != CAM_REQ_INPROG)
                              break;
                      DELAY(100);
              }
      
              if (timeout == 0) {
                      /*
                       * XXX Is it worth adding a sim_timeout entry
                       * point so we can attempt recovery?  If
                       * this is only used for dumps, I don't think
                       * it is.
                       */
                      start_ccb->ccb_h.status = CAM_CMD_TIMEOUT;
              }
      }
      
      void
      xpt_polled_action(union ccb *start_ccb)
      {
              uint32_t        timeout;
              struct cam_ed        *dev;
      
              timeout = start_ccb->ccb_h.timeout * 10;
              dev = start_ccb->ccb_h.path->device;
      
              mtx_unlock(&dev->device_mtx);
      
              timeout = xpt_poll_setup(start_ccb);
              if (timeout > 0) {
                      xpt_action(start_ccb);
                      xpt_pollwait(start_ccb, timeout);
              } else {
                      start_ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
              }
      
              mtx_lock(&dev->device_mtx);
      }
      
      /*
       * Schedule a peripheral driver to receive a ccb when its
       * target device has space for more transactions.
       */
      void
      xpt_schedule(struct cam_periph *periph, u_int32_t new_priority)
 3162 {
      
              CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("xpt_schedule\n"));
              cam_periph_assert(periph, MA_OWNED);
  728         if (new_priority < periph->scheduled_priority) {
 3162                 periph->scheduled_priority = new_priority;
                      xpt_run_allocq(periph, 0);
              }
      }
      
      /*
       * Schedule a device to run on a given queue.
       * If the device was inserted as a new entry on the queue,
       * return 1 meaning the device queue should be run. If we
       * were already queued, implying someone else has already
       * started the queue, return 0 so the caller doesn't attempt
       * to run the queue.
       */
      static int
      xpt_schedule_dev(struct camq *queue, cam_pinfo *pinfo,
                       u_int32_t new_priority)
      {
              int retval;
              u_int32_t old_priority;
      
              CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_schedule_dev\n"));
      
              old_priority = pinfo->priority;
      
              /*
               * Are we already queued?
               */
              if (pinfo->index != CAM_UNQUEUED_INDEX) {
                      /* Simply reorder based on new priority */
                      if (new_priority < old_priority) {
                              camq_change_priority(queue, pinfo->index,
                                                   new_priority);
                              CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
                                              ("changed priority to %d\n",
                                               new_priority));
                              retval = 1;
                      } else
                              retval = 0;
              } else {
                      /* New entry on the queue */
 3162                 if (new_priority < old_priority)
                              pinfo->priority = new_priority;
      
                      CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
                                      ("Inserting onto queue\n"));
                      pinfo->generation = ++queue->generation;
                      camq_insert(queue, pinfo);
                      retval = 1;
              }
              return (retval);
      }
      
      static void
      xpt_run_allocq_task(void *context, int pending)
      {
              struct cam_periph *periph = context;
      
              cam_periph_lock(periph);
              periph->flags &= ~CAM_PERIPH_RUN_TASK;
              xpt_run_allocq(periph, 1);
              cam_periph_unlock(periph);
              cam_periph_release(periph);
      }
      
      static void
      xpt_run_allocq(struct cam_periph *periph, int sleep)
 3162 {
              struct cam_ed        *device;
              union ccb        *ccb;
              uint32_t         prio;
      
              cam_periph_assert(periph, MA_OWNED);
  756         if (periph->periph_allocating)
                      return;
              cam_periph_doacquire(periph);
              periph->periph_allocating = 1;
              CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_run_allocq(%p)\n", periph));
              device = periph->path->device;
              ccb = NULL;
      restart:
              while ((prio = min(periph->scheduled_priority,
 3162             periph->immediate_priority)) != CAM_PRIORITY_NONE &&
                  (periph->periph_allocated - (ccb != NULL ? 1 : 0) <
                   device->ccbq.total_openings || prio <= CAM_PRIORITY_OOB)) {
                      if (ccb == NULL &&
 3162                     (ccb = xpt_get_ccb_nowait(periph)) == NULL) {
                              if (sleep) {
                                      ccb = xpt_get_ccb(periph);
                                      goto restart;
                              }
                              if (periph->flags & CAM_PERIPH_RUN_TASK)
                                      break;
                              cam_periph_doacquire(periph);
                              periph->flags |= CAM_PERIPH_RUN_TASK;
                              taskqueue_enqueue(xsoftc.xpt_taskq,
                                  &periph->periph_run_task);
                              break;
                      }
 3162                 xpt_setup_ccb(&ccb->ccb_h, periph->path, prio);
                      if (prio == periph->immediate_priority) {
                              periph->immediate_priority = CAM_PRIORITY_NONE;
                              CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
                                              ("waking cam_periph_getccb()\n"));
                              SLIST_INSERT_HEAD(&periph->ccb_list, &ccb->ccb_h,
                                                periph_links.sle);
                              wakeup(&periph->ccb_list);
                      } else {
 3162                         periph->scheduled_priority = CAM_PRIORITY_NONE;
                              CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
                                              ("calling periph_start()\n"));
                              periph->periph_start(periph, ccb);
                      }
                      ccb = NULL;
              }
              if (ccb != NULL)
                      xpt_release_ccb(ccb);
              periph->periph_allocating = 0;
              cam_periph_release_locked(periph);
      }
      
      static void
      xpt_run_devq(struct cam_devq *devq)
 3162 {
              struct mtx *mtx;
      
              CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_run_devq\n"));
      
              devq->send_queue.qfrozen_cnt++;
              while ((devq->send_queue.entries > 0)
 3162             && (devq->send_openings > 0)
                  && (devq->send_queue.qfrozen_cnt <= 1)) {
                      struct        cam_ed *device;
                      union ccb *work_ccb;
                      struct        cam_sim *sim;
                      struct xpt_proto *proto;
      
                      device = (struct cam_ed *)camq_remove(&devq->send_queue,
                                                                 CAMQ_HEAD);
                      CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
                                      ("running device %p\n", device));
      
                      work_ccb = cam_ccbq_peek_ccb(&device->ccbq, CAMQ_HEAD);
                      if (work_ccb == NULL) {
                              printf("device on run queue with no ccbs???\n");
                              continue;
                      }
      
 3162                 if ((work_ccb->ccb_h.flags & CAM_HIGH_POWER) != 0) {
                              mtx_lock(&xsoftc.xpt_highpower_lock);
                               if (xsoftc.num_highpower <= 0) {
                                      /*
                                       * We got a high power command, but we
                                       * don't have any available slots.  Freeze
                                       * the device queue until we have a slot
                                       * available.
                                       */
                                      xpt_freeze_devq_device(device, 1);
                                      STAILQ_INSERT_TAIL(&xsoftc.highpowerq, device,
                                                         highpowerq_entry);
      
                                      mtx_unlock(&xsoftc.xpt_highpower_lock);
                                      continue;
                              } else {
                                      /*
                                       * Consume a high power slot while
                                       * this ccb runs.
                                       */
                                      xsoftc.num_highpower--;
                              }
                              mtx_unlock(&xsoftc.xpt_highpower_lock);
                      }
                      cam_ccbq_remove_ccb(&device->ccbq, work_ccb);
                      cam_ccbq_send_ccb(&device->ccbq, work_ccb);
                      devq->send_openings--;
                      devq->send_active++;
                      xpt_schedule_devq(devq, device);
                      mtx_unlock(&devq->send_mtx);
      
 3162                 if ((work_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0) {
                              /*
                               * The client wants to freeze the queue
                               * after this CCB is sent.
                               */
                              xpt_freeze_devq(work_ccb->ccb_h.path, 1);
                      }
      
                      /* In Target mode, the peripheral driver knows best... */
                      if (work_ccb->ccb_h.func_code == XPT_SCSI_IO) {
                              if ((device->inq_flags & SID_CmdQue) != 0
                               && work_ccb->csio.tag_action != CAM_TAG_ACTION_NONE)
 3162                                 work_ccb->ccb_h.flags |= CAM_TAG_ACTION_VALID;
                              else
                                      /*
                                       * Clear this in case of a retried CCB that
                                       * failed due to a rejected tag.
                                       */
                                      work_ccb->ccb_h.flags &= ~CAM_TAG_ACTION_VALID;
                      }
      
                      KASSERT(device == work_ccb->ccb_h.path->device,
                          ("device (%p) / path->device (%p) mismatch",
                              device, work_ccb->ccb_h.path->device));
                      proto = xpt_proto_find(device->protocol);
                      if (proto && proto->ops->debug_out)
 3162                         proto->ops->debug_out(work_ccb);
      
                      /*
                       * Device queues can be shared among multiple SIM instances
                       * that reside on different buses.  Use the SIM from the
                       * queued device, rather than the one from the calling bus.
                       */
                      sim = device->sim;
                      mtx = sim->mtx;
                      if (mtx && !mtx_owned(mtx))
 3162                         mtx_lock(mtx);
                      else
                              mtx = NULL;
                      work_ccb->ccb_h.qos.periph_data = cam_iosched_now();
                      (*(sim->sim_action))(sim, work_ccb);
                      if (mtx)
 3162                         mtx_unlock(mtx);
                      mtx_lock(&devq->send_mtx);
              }
              devq->send_queue.qfrozen_cnt--;
      }
      
      /*
       * This function merges stuff from the src ccb into the dst ccb, while keeping
       * important fields in the dst ccb constant.
       */
      void
      xpt_merge_ccb(union ccb *dst_ccb, union ccb *src_ccb)
      {
      
              /*
               * Pull fields that are valid for peripheral drivers to set
               * into the dst CCB along with the CCB "payload".
               */
              dst_ccb->ccb_h.retry_count = src_ccb->ccb_h.retry_count;
              dst_ccb->ccb_h.func_code = src_ccb->ccb_h.func_code;
              dst_ccb->ccb_h.timeout = src_ccb->ccb_h.timeout;
              dst_ccb->ccb_h.flags = src_ccb->ccb_h.flags;
              bcopy(&(&src_ccb->ccb_h)[1], &(&dst_ccb->ccb_h)[1],
                    sizeof(union ccb) - sizeof(struct ccb_hdr));
      }
      
      void
      xpt_setup_ccb_flags(struct ccb_hdr *ccb_h, struct cam_path *path,
                          u_int32_t priority, u_int32_t flags)
      {
      
              CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_setup_ccb\n"));
              ccb_h->pinfo.priority = priority;
              ccb_h->path = path;
              ccb_h->path_id = path->bus->path_id;
              if (path->target)
 3162                 ccb_h->target_id = path->target->target_id;
              else
                      ccb_h->target_id = CAM_TARGET_WILDCARD;
              if (path->device) {
 3162                 ccb_h->target_lun = path->device->lun_id;
                      ccb_h->pinfo.generation = ++path->device->ccbq.queue.generation;
              } else {
                      ccb_h->target_lun = CAM_TARGET_WILDCARD;
              }
              ccb_h->pinfo.index = CAM_UNQUEUED_INDEX;
              ccb_h->flags = flags;
              ccb_h->xflags = 0;
      }
      
      void
      xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
      {
 3162         xpt_setup_ccb_flags(ccb_h, path, priority, /*flags*/ 0);
      }
      
      /* Path manipulation functions */
      cam_status
      xpt_create_path(struct cam_path **new_path_ptr, struct cam_periph *perph,
                      path_id_t path_id, target_id_t target_id, lun_id_t lun_id)
      {
              struct           cam_path *path;
              cam_status status;
      
              path = (struct cam_path *)malloc(sizeof(*path), M_CAMPATH, M_NOWAIT);
      
              if (path == NULL) {
                      status = CAM_RESRC_UNAVAIL;
                      return(status);
              }
              status = xpt_compile_path(path, perph, path_id, target_id, lun_id);
              if (status != CAM_REQ_CMP) {
                      free(path, M_CAMPATH);
                      path = NULL;
              }
              *new_path_ptr = path;
              return (status);
      }
      
      cam_status
      xpt_create_path_unlocked(struct cam_path **new_path_ptr,
                               struct cam_periph *periph, path_id_t path_id,
                               target_id_t target_id, lun_id_t lun_id)
      {
      
              return (xpt_create_path(new_path_ptr, periph, path_id, target_id,
                  lun_id));
      }
      
      cam_status
      xpt_compile_path(struct cam_path *new_path, struct cam_periph *perph,
                       path_id_t path_id, target_id_t target_id, lun_id_t lun_id)
      {
              struct             cam_eb *bus;
              struct             cam_et *target;
              struct             cam_ed *device;
              cam_status   status;
      
              status = CAM_REQ_CMP;        /* Completed without error */
              target = NULL;                /* Wildcarded */
              device = NULL;                /* Wildcarded */
      
              /*
               * We will potentially modify the EDT, so block interrupts
               * that may attempt to create cam paths.
               */
              bus = xpt_find_bus(path_id);
              if (bus == NULL) {
                      status = CAM_PATH_INVALID;
              } else {
                      xpt_lock_buses();
                      mtx_lock(&bus->eb_mtx);
                      target = xpt_find_target(bus, target_id);
                      if (target == NULL) {
                              /* Create one */
                              struct cam_et *new_target;
      
                              new_target = xpt_alloc_target(bus, target_id);
                              if (new_target == NULL) {
                                      status = CAM_RESRC_UNAVAIL;
                              } else {
                                      target = new_target;
                              }
                      }
                      xpt_unlock_buses();
                      if (target != NULL) {
                              device = xpt_find_device(target, lun_id);
                              if (device == NULL) {
                                      /* Create one */
                                      struct cam_ed *new_device;
      
                                      new_device =
                                          (*(bus->xport->ops->alloc_device))(bus,
                                                                             target,
                                                                             lun_id);
                                      if (new_device == NULL) {
                                              status = CAM_RESRC_UNAVAIL;
                                      } else {
                                              device = new_device;
                                      }
                              }
                      }
                      mtx_unlock(&bus->eb_mtx);
              }
      
              /*
               * Only touch the user's data if we are successful.
               */
              if (status == CAM_REQ_CMP) {
                      new_path->periph = perph;
                      new_path->bus = bus;
                      new_path->target = target;
                      new_path->device = device;
                      CAM_DEBUG(new_path, CAM_DEBUG_TRACE, ("xpt_compile_path\n"));
              } else {
                      if (device != NULL)
                              xpt_release_device(device);
                      if (target != NULL)
                              xpt_release_target(target);
                      if (bus != NULL)
                              xpt_release_bus(bus);
              }
              return (status);
      }
      
      cam_status
      xpt_clone_path(struct cam_path **new_path_ptr, struct cam_path *path)
      {
              struct           cam_path *new_path;
      
              new_path = (struct cam_path *)malloc(sizeof(*path), M_CAMPATH, M_NOWAIT);
              if (new_path == NULL)
                      return(CAM_RESRC_UNAVAIL);
              *new_path = *path;
              if (path->bus != NULL)
                      xpt_acquire_bus(path->bus);
              if (path->target != NULL)
                      xpt_acquire_target(path->target);
              if (path->device != NULL)
                      xpt_acquire_device(path->device);
              *new_path_ptr = new_path;
              return (CAM_REQ_CMP);
      }
      
      void
      xpt_release_path(struct cam_path *path)
      {
              CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_release_path\n"));
              if (path->device != NULL) {
                      xpt_release_device(path->device);
                      path->device = NULL;
              }
              if (path->target != NULL) {
                      xpt_release_target(path->target);
                      path->target = NULL;
              }
              if (path->bus != NULL) {
                      xpt_release_bus(path->bus);
                      path->bus = NULL;
              }
      }
      
      void
      xpt_free_path(struct cam_path *path)
      {
      
              CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_free_path\n"));
              xpt_release_path(path);
              free(path, M_CAMPATH);
      }
      
      void
      xpt_path_counts(struct cam_path *path, uint32_t *bus_ref,
          uint32_t *periph_ref, uint32_t *target_ref, uint32_t *device_ref)
      {
      
              xpt_lock_buses();
              if (bus_ref) {
                      if (path->bus)
                              *bus_ref = path->bus->refcount;
                      else
                              *bus_ref = 0;
              }
              if (periph_ref) {
                      if (path->periph)
                              *periph_ref = path->periph->refcount;
                      else
                              *periph_ref = 0;
              }
              xpt_unlock_buses();
              if (target_ref) {
                      if (path->target)
                              *target_ref = path->target->refcount;
                      else
                              *target_ref = 0;
              }
              if (device_ref) {
                      if (path->device)
                              *device_ref = path->device->refcount;
                      else
                              *device_ref = 0;
              }
      }
      
      /*
       * Return -1 for failure, 0 for exact match, 1 for match with wildcards
       * in path1, 2 for match with wildcards in path2.
       */
      int
      xpt_path_comp(struct cam_path *path1, struct cam_path *path2)
      {
              int retval = 0;
      
              if (path1->bus != path2->bus) {
                      if (path1->bus->path_id == CAM_BUS_WILDCARD)
                              retval = 1;
                      else if (path2->bus->path_id == CAM_BUS_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              if (path1->target != path2->target) {
                      if (path1->target->target_id == CAM_TARGET_WILDCARD) {
                              if (retval == 0)
                                      retval = 1;
                      } else if (path2->target->target_id == CAM_TARGET_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              if (path1->device != path2->device) {
                      if (path1->device->lun_id == CAM_LUN_WILDCARD) {
                              if (retval == 0)
                                      retval = 1;
                      } else if (path2->device->lun_id == CAM_LUN_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              return (retval);
      }
      
      int
      xpt_path_comp_dev(struct cam_path *path, struct cam_ed *dev)
      {
              int retval = 0;
      
              if (path->bus != dev->target->bus) {
                      if (path->bus->path_id == CAM_BUS_WILDCARD)
                              retval = 1;
                      else if (dev->target->bus->path_id == CAM_BUS_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              if (path->target != dev->target) {
                      if (path->target->target_id == CAM_TARGET_WILDCARD) {
                              if (retval == 0)
                                      retval = 1;
                      } else if (dev->target->target_id == CAM_TARGET_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              if (path->device != dev) {
                      if (path->device->lun_id == CAM_LUN_WILDCARD) {
                              if (retval == 0)
                                      retval = 1;
                      } else if (dev->lun_id == CAM_LUN_WILDCARD)
                              retval = 2;
                      else
                              return (-1);
              }
              return (retval);
      }
      
      void
      xpt_print_path(struct cam_path *path)
      {
              struct sbuf sb;
              char buffer[XPT_PRINT_LEN];
      
              sbuf_new(&sb, buffer, XPT_PRINT_LEN, SBUF_FIXEDLEN);
              xpt_path_sbuf(path, &sb);
              sbuf_finish(&sb);
              printf("%s", sbuf_data(&sb));
              sbuf_delete(&sb);
      }
      
      void
      xpt_print_device(struct cam_ed *device)
      {
      
              if (device == NULL)
                      printf("(nopath): ");
              else {
                      printf("(noperiph:%s%d:%d:%d:%jx): ", device->sim->sim_name,
                             device->sim->unit_number,
                             device->sim->bus_id,
                             device->target->target_id,
                             (uintmax_t)device->lun_id);
              }
      }
      
      void
      xpt_print(struct cam_path *path, const char *fmt, ...)
      {
              va_list ap;
              struct sbuf sb;
              char buffer[XPT_PRINT_LEN];
      
              sbuf_new(&sb, buffer, XPT_PRINT_LEN, SBUF_FIXEDLEN);
      
              xpt_path_sbuf(path, &sb);
              va_start(ap, fmt);
              sbuf_vprintf(&sb, fmt, ap);
              va_end(ap);
      
              sbuf_finish(&sb);
              printf("%s", sbuf_data(&sb));
              sbuf_delete(&sb);
      }
      
      int
      xpt_path_string(struct cam_path *path, char *str, size_t str_len)
      {
              struct sbuf sb;
              int len;
      
              sbuf_new(&sb, str, str_len, 0);
              len = xpt_path_sbuf(path, &sb);
              sbuf_finish(&sb);
              return (len);
      }
      
      int
      xpt_path_sbuf(struct cam_path *path, struct sbuf *sb)
      {
      
              if (path == NULL)
                      sbuf_printf(sb, "(nopath): ");
              else {
                      if (path->periph != NULL)
                              sbuf_printf(sb, "(%s%d:", path->periph->periph_name,
                                          path->periph->unit_number);
                      else
                              sbuf_printf(sb, "(noperiph:");
      
                      if (path->bus != NULL)
                              sbuf_printf(sb, "%s%d:%d:", path->bus->sim->sim_name,
                                          path->bus->sim->unit_number,
                                          path->bus->sim->bus_id);
                      else
                              sbuf_printf(sb, "nobus:");
      
                      if (path->target != NULL)
                              sbuf_printf(sb, "%d:", path->target->target_id);
                      else
                              sbuf_printf(sb, "X:");
      
                      if (path->device != NULL)
                              sbuf_printf(sb, "%jx): ",
                                  (uintmax_t)path->device->lun_id);
                      else
                              sbuf_printf(sb, "X): ");
              }
      
              return(sbuf_len(sb));
      }
      
      path_id_t
      xpt_path_path_id(struct cam_path *path)
      {
              return(path->bus->path_id);
      }
      
      target_id_t
      xpt_path_target_id(struct cam_path *path)
      {
              if (path->target != NULL)
                      return (path->target->target_id);
              else
                      return (CAM_TARGET_WILDCARD);
      }
      
      lun_id_t
      xpt_path_lun_id(struct cam_path *path)
      {
              if (path->device != NULL)
                      return (path->device->lun_id);
              else
                      return (CAM_LUN_WILDCARD);
      }
      
      struct cam_sim *
      xpt_path_sim(struct cam_path *path)
      {
      
              return (path->bus->sim);
      }
      
      struct cam_periph*
      xpt_path_periph(struct cam_path *path)
      {
      
              return (path->periph);
      }
      
      /*
       * Release a CAM control block for the caller.  Remit the cost of the structure
       * to the device referenced by the path.  If the this device had no 'credits'
       * and peripheral drivers have registered async callbacks for this notification
       * call them now.
       */
      void
      xpt_release_ccb(union ccb *free_ccb)
      {
              struct         cam_ed *device;
              struct         cam_periph *periph;
      
              CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_release_ccb\n"));
              xpt_path_assert(free_ccb->ccb_h.path, MA_OWNED);
              device = free_ccb->ccb_h.path->device;
              periph = free_ccb->ccb_h.path->periph;
      
              xpt_free_ccb(free_ccb);
              periph->periph_allocated--;
              cam_ccbq_release_opening(&device->ccbq);
              xpt_run_allocq(periph, 0);
      }
      
      /* Functions accessed by SIM drivers */
      
      static struct xpt_xport_ops xport_default_ops = {
              .alloc_device = xpt_alloc_device_default,
              .action = xpt_action_default,
              .async = xpt_dev_async_default,
      };
      static struct xpt_xport xport_default = {
              .xport = XPORT_UNKNOWN,
              .name = "unknown",
              .ops = &xport_default_ops,
      };
      
      CAM_XPT_XPORT(xport_default);
      
      /*
       * A sim structure, listing the SIM entry points and instance
       * identification info is passed to xpt_bus_register to hook the SIM
       * into the CAM framework.  xpt_bus_register creates a cam_eb entry
       * for this new bus and places it in the array of buses and assigns
       * it a path_id.  The path_id may be influenced by "hard wiring"
       * information specified by the user.  Once interrupt services are
       * available, the bus will be probed.
       */
      int32_t
      xpt_bus_register(struct cam_sim *sim, device_t parent, u_int32_t bus)
      {
              struct cam_eb *new_bus;
              struct cam_eb *old_bus;
              struct ccb_pathinq cpi;
              struct cam_path *path;
              cam_status status;
      
              sim->bus_id = bus;
              new_bus = (struct cam_eb *)malloc(sizeof(*new_bus),
                                                M_CAMXPT, M_NOWAIT|M_ZERO);
              if (new_bus == NULL) {
                      /* Couldn't satisfy request */
                      return (CAM_RESRC_UNAVAIL);
              }
      
              mtx_init(&new_bus->eb_mtx, "CAM bus lock", NULL, MTX_DEF);
              TAILQ_INIT(&new_bus->et_entries);
              cam_sim_hold(sim);
              new_bus->sim = sim;
              timevalclear(&new_bus->last_reset);
              new_bus->flags = 0;
              new_bus->refcount = 1;        /* Held until a bus_deregister event */
              new_bus->generation = 0;
      
              xpt_lock_buses();
              sim->path_id = new_bus->path_id =
                  xptpathid(sim->sim_name, sim->unit_number, sim->bus_id);
              old_bus = TAILQ_FIRST(&xsoftc.xpt_busses);
              while (old_bus != NULL
                  && old_bus->path_id < new_bus->path_id)
                      old_bus = TAILQ_NEXT(old_bus, links);
              if (old_bus != NULL)
                      TAILQ_INSERT_BEFORE(old_bus, new_bus, links);
              else
                      TAILQ_INSERT_TAIL(&xsoftc.xpt_busses, new_bus, links);
              xsoftc.bus_generation++;
              xpt_unlock_buses();
      
              /*
               * Set a default transport so that a PATH_INQ can be issued to
               * the SIM.  This will then allow for probing and attaching of
               * a more appropriate transport.
               */
              new_bus->xport = &xport_default;
      
              status = xpt_create_path(&path, /*periph*/NULL, sim->path_id,
                                        CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
              if (status != CAM_REQ_CMP) {
                      xpt_release_bus(new_bus);
                      return (CAM_RESRC_UNAVAIL);
              }
      
              xpt_path_inq(&cpi, path);
      
              if (cpi.ccb_h.status == CAM_REQ_CMP) {
                      struct xpt_xport **xpt;
      
                      SET_FOREACH(xpt, cam_xpt_xport_set) {
                              if ((*xpt)->xport == cpi.transport) {
                                      new_bus->xport = *xpt;
                                      break;
                              }
                      }
                      if (new_bus->xport == NULL) {
                              xpt_print(path,
                                  "No transport found for %d\n", cpi.transport);
                              xpt_release_bus(new_bus);
                              free(path, M_CAMXPT);
                              return (CAM_RESRC_UNAVAIL);
                      }
              }
      
              /* Notify interested parties */
              if (sim->path_id != CAM_XPT_PATH_ID) {
                      xpt_async(AC_PATH_REGISTERED, path, &cpi);
                      if ((cpi.hba_misc & PIM_NOSCAN) == 0) {
                              union        ccb *scan_ccb;
      
                              /* Initiate bus rescan. */
                              scan_ccb = xpt_alloc_ccb_nowait();
                              if (scan_ccb != NULL) {
                                      scan_ccb->ccb_h.path = path;
                                      scan_ccb->ccb_h.func_code = XPT_SCAN_BUS;
                                      scan_ccb->crcn.flags = 0;
                                      xpt_rescan(scan_ccb);
                              } else {
                                      xpt_print(path,
                                                "Can't allocate CCB to scan bus\n");
                                      xpt_free_path(path);
                              }
                      } else
                              xpt_free_path(path);
              } else
                      xpt_free_path(path);
              return (CAM_SUCCESS);
      }
      
      int32_t
      xpt_bus_deregister(path_id_t pathid)
      {
              struct cam_path bus_path;
              cam_status status;
      
              status = xpt_compile_path(&bus_path, NULL, pathid,
                                        CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
              if (status != CAM_REQ_CMP)
                      return (status);
      
              xpt_async(AC_LOST_DEVICE, &bus_path, NULL);
              xpt_async(AC_PATH_DEREGISTERED, &bus_path, NULL);
      
              /* Release the reference count held while registered. */
              xpt_release_bus(bus_path.bus);
              xpt_release_path(&bus_path);
      
              return (CAM_REQ_CMP);
      }
      
      static path_id_t
      xptnextfreepathid(void)
      {
              struct cam_eb *bus;
              path_id_t pathid;
              const char *strval;
      
              mtx_assert(&xsoftc.xpt_topo_lock, MA_OWNED);
              pathid = 0;
              bus = TAILQ_FIRST(&xsoftc.xpt_busses);
      retry:
              /* Find an unoccupied pathid */
              while (bus != NULL && bus->path_id <= pathid) {
                      if (bus->path_id == pathid)
                              pathid++;
                      bus = TAILQ_NEXT(bus, links);
              }
      
              /*
               * Ensure that this pathid is not reserved for
               * a bus that may be registered in the future.
               */
              if (resource_string_value("scbus", pathid, "at", &strval) == 0) {
                      ++pathid;
                      /* Start the search over */
                      goto retry;
              }
              return (pathid);
      }
      
      static path_id_t
      xptpathid(const char *sim_name, int sim_unit, int sim_bus)
      {
              path_id_t pathid;
              int i, dunit, val;
              char buf[32];
              const char *dname;
      
              pathid = CAM_XPT_PATH_ID;
              snprintf(buf, sizeof(buf), "%s%d", sim_name, sim_unit);
              if (strcmp(buf, "xpt0") == 0 && sim_bus == 0)
                      return (pathid);
              i = 0;
              while ((resource_find_match(&i, &dname, &dunit, "at", buf)) == 0) {
                      if (strcmp(dname, "scbus")) {
                              /* Avoid a bit of foot shooting. */
                              continue;
                      }
                      if (dunit < 0)                /* unwired?! */
                              continue;
                      if (resource_int_value("scbus", dunit, "bus", &val) == 0) {
                              if (sim_bus == val) {
                                      pathid = dunit;
                                      break;
                              }
                      } else if (sim_bus == 0) {
                              /* Unspecified matches bus 0 */
                              pathid = dunit;
                              break;
                      } else {
                              printf("Ambiguous scbus configuration for %s%d "
                                     "bus %d, cannot wire down.  The kernel "
                                     "config entry for scbus%d should "
                                     "specify a controller bus.\n"
                                     "Scbus will be assigned dynamically.\n",
                                     sim_name, sim_unit, sim_bus, dunit);
                              break;
                      }
              }
      
              if (pathid == CAM_XPT_PATH_ID)
                      pathid = xptnextfreepathid();
              return (pathid);
      }
      
      static const char *
      xpt_async_string(u_int32_t async_code)
      {
      
              switch (async_code) {
              case AC_BUS_RESET: return ("AC_BUS_RESET");
              case AC_UNSOL_RESEL: return ("AC_UNSOL_RESEL");
              case AC_SCSI_AEN: return ("AC_SCSI_AEN");
              case AC_SENT_BDR: return ("AC_SENT_BDR");
              case AC_PATH_REGISTERED: return ("AC_PATH_REGISTERED");
              case AC_PATH_DEREGISTERED: return ("AC_PATH_DEREGISTERED");
              case AC_FOUND_DEVICE: return ("AC_FOUND_DEVICE");
              case AC_LOST_DEVICE: return ("AC_LOST_DEVICE");
              case AC_TRANSFER_NEG: return ("AC_TRANSFER_NEG");
              case AC_INQ_CHANGED: return ("AC_INQ_CHANGED");
              case AC_GETDEV_CHANGED: return ("AC_GETDEV_CHANGED");
              case AC_CONTRACT: return ("AC_CONTRACT");
              case AC_ADVINFO_CHANGED: return ("AC_ADVINFO_CHANGED");
              case AC_UNIT_ATTENTION: return ("AC_UNIT_ATTENTION");
              }
              return ("AC_UNKNOWN");
      }
      
      static int
      xpt_async_size(u_int32_t async_code)
      {
      
              switch (async_code) {
              case AC_BUS_RESET: return (0);
              case AC_UNSOL_RESEL: return (0);
              case AC_SCSI_AEN: return (0);
              case AC_SENT_BDR: return (0);
              case AC_PATH_REGISTERED: return (sizeof(struct ccb_pathinq));
              case AC_PATH_DEREGISTERED: return (0);
              case AC_FOUND_DEVICE: return (sizeof(struct ccb_getdev));
              case AC_LOST_DEVICE: return (0);
              case AC_TRANSFER_NEG: return (sizeof(struct ccb_trans_settings));
              case AC_INQ_CHANGED: return (0);
              case AC_GETDEV_CHANGED: return (0);
              case AC_CONTRACT: return (sizeof(struct ac_contract));
              case AC_ADVINFO_CHANGED: return (-1);
              case AC_UNIT_ATTENTION: return (sizeof(struct ccb_scsiio));
              }
              return (0);
      }
      
      static int
      xpt_async_process_dev(struct cam_ed *device, void *arg)
      {
              union ccb *ccb = arg;
              struct cam_path *path = ccb->ccb_h.path;
              void *async_arg = ccb->casync.async_arg_ptr;
              u_int32_t async_code = ccb->casync.async_code;
              int relock;
      
              if (path->device != device
               && path->device->lun_id != CAM_LUN_WILDCARD
               && device->lun_id != CAM_LUN_WILDCARD)
                      return (1);
      
              /*
               * The async callback could free the device.
               * If it is a broadcast async, it doesn't hold
               * device reference, so take our own reference.
               */
              xpt_acquire_device(device);
      
              /*
               * If async for specific device is to be delivered to
               * the wildcard client, take the specific device lock.
               * XXX: We may need a way for client to specify it.
               */
              if ((device->lun_id == CAM_LUN_WILDCARD &&
                   path->device->lun_id != CAM_LUN_WILDCARD) ||
                  (device->target->target_id == CAM_TARGET_WILDCARD &&
                   path->target->target_id != CAM_TARGET_WILDCARD) ||
                  (device->target->bus->path_id == CAM_BUS_WILDCARD &&
                   path->target->bus->path_id != CAM_BUS_WILDCARD)) {
                      mtx_unlock(&device->device_mtx);
                      xpt_path_lock(path);
                      relock = 1;
              } else
                      relock = 0;
      
              (*(device->target->bus->xport->ops->async))(async_code,
                  device->target->bus, device->target, device, async_arg);
              xpt_async_bcast(&device->asyncs, async_code, path, async_arg);
      
              if (relock) {
                      xpt_path_unlock(path);
                      mtx_lock(&device->device_mtx);
              }
              xpt_release_device(device);
              return (1);
      }
      
      static int
      xpt_async_process_tgt(struct cam_et *target, void *arg)
      {
              union ccb *ccb = arg;
              struct cam_path *path = ccb->ccb_h.path;
      
              if (path->target != target
               && path->target->target_id != CAM_TARGET_WILDCARD
               && target->target_id != CAM_TARGET_WILDCARD)
                      return (1);
      
              if (ccb->casync.async_code == AC_SENT_BDR) {
                      /* Update our notion of when the last reset occurred */
                      microtime(&target->last_reset);
              }
      
              return (xptdevicetraverse(target, NULL, xpt_async_process_dev, ccb));
      }
      
      static void
      xpt_async_process(struct cam_periph *periph, union ccb *ccb)
      {
              struct cam_eb *bus;
              struct cam_path *path;
              void *async_arg;
              u_int32_t async_code;
      
              path = ccb->ccb_h.path;
              async_code = ccb->casync.async_code;
              async_arg = ccb->casync.async_arg_ptr;
              CAM_DEBUG(path, CAM_DEBUG_TRACE | CAM_DEBUG_INFO,
                  ("xpt_async(%s)\n", xpt_async_string(async_code)));
              bus = path->bus;
      
              if (async_code == AC_BUS_RESET) {
                      /* Update our notion of when the last reset occurred */
                      microtime(&bus->last_reset);
              }
      
              xpttargettraverse(bus, NULL, xpt_async_process_tgt, ccb);
      
              /*
               * If this wasn't a fully wildcarded async, tell all
               * clients that want all async events.
               */
              if (bus != xpt_periph->path->bus) {
                      xpt_path_lock(xpt_periph->path);
                      xpt_async_process_dev(xpt_periph->path->device, ccb);
                      xpt_path_unlock(xpt_periph->path);
              }
      
              if (path->device != NULL && path->device->lun_id != CAM_LUN_WILDCARD)
                      xpt_release_devq(path, 1, TRUE);
              else
                      xpt_release_simq(path->bus->sim, TRUE);
              if (ccb->casync.async_arg_size > 0)
                      free(async_arg, M_CAMXPT);
              xpt_free_path(path);
              xpt_free_ccb(ccb);
      }
      
      static void
      xpt_async_bcast(struct async_list *async_head,
                      u_int32_t async_code,
                      struct cam_path *path, void *async_arg)
      {
              struct async_node *cur_entry;
              struct mtx *mtx;
      
              cur_entry = SLIST_FIRST(async_head);
              while (cur_entry != NULL) {
                      struct async_node *next_entry;
                      /*
                       * Grab the next list entry before we call the current
                       * entry's callback.  This is because the callback function
                       * can delete its async callback entry.
                       */
                      next_entry = SLIST_NEXT(cur_entry, links);
                      if ((cur_entry->event_enable & async_code) != 0) {
                              mtx = cur_entry->event_lock ?
                                  path->device->sim->mtx : NULL;
                              if (mtx)
                                      mtx_lock(mtx);
                              cur_entry->callback(cur_entry->callback_arg,
                                                  async_code, path,
                                                  async_arg);
                              if (mtx)
                                      mtx_unlock(mtx);
                      }
                      cur_entry = next_entry;
              }
      }
      
      void
      xpt_async(u_int32_t async_code, struct cam_path *path, void *async_arg)
      {
              union ccb *ccb;
              int size;
      
              ccb = xpt_alloc_ccb_nowait();
              if (ccb == NULL) {
                      xpt_print(path, "Can't allocate CCB to send %s\n",
                          xpt_async_string(async_code));
                      return;
              }
      
              if (xpt_clone_path(&ccb->ccb_h.path, path) != CAM_REQ_CMP) {
                      xpt_print(path, "Can't allocate path to send %s\n",
                          xpt_async_string(async_code));
                      xpt_free_ccb(ccb);
                      return;
              }
              ccb->ccb_h.path->periph = NULL;
              ccb->ccb_h.func_code = XPT_ASYNC;
              ccb->ccb_h.cbfcnp = xpt_async_process;
              ccb->ccb_h.flags |= CAM_UNLOCKED;
              ccb->casync.async_code = async_code;
              ccb->casync.async_arg_size = 0;
              size = xpt_async_size(async_code);
              CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
                  ("xpt_async: func %#x %s aync_code %d %s\n",
                      ccb->ccb_h.func_code,
                      xpt_action_name(ccb->ccb_h.func_code),
                      async_code,
                      xpt_async_string(async_code)));
              if (size > 0 && async_arg != NULL) {
                      ccb->casync.async_arg_ptr = malloc(size, M_CAMXPT, M_NOWAIT);
                      if (ccb->casync.async_arg_ptr == NULL) {
                              xpt_print(path, "Can't allocate argument to send %s\n",
                                  xpt_async_string(async_code));
                              xpt_free_path(ccb->ccb_h.path);
                              xpt_free_ccb(ccb);
                              return;
                      }
                      memcpy(ccb->casync.async_arg_ptr, async_arg, size);
                      ccb->casync.async_arg_size = size;
              } else if (size < 0) {
                      ccb->casync.async_arg_ptr = async_arg;
                      ccb->casync.async_arg_size = size;
              }
              if (path->device != NULL && path->device->lun_id != CAM_LUN_WILDCARD)
                      xpt_freeze_devq(path, 1);
              else
                      xpt_freeze_simq(path->bus->sim, 1);
              xpt_action(ccb);
      }
      
      static void
      xpt_dev_async_default(u_int32_t async_code, struct cam_eb *bus,
                            struct cam_et *target, struct cam_ed *device,
                            void *async_arg)
      {
      
              /*
               * We only need to handle events for real devices.
               */
              if (target->target_id == CAM_TARGET_WILDCARD
               || device->lun_id == CAM_LUN_WILDCARD)
                      return;
      
              printf("%s called\n", __func__);
      }
      
      static uint32_t
      xpt_freeze_devq_device(struct cam_ed *dev, u_int count)
      {
              struct cam_devq        *devq;
              uint32_t freeze;
      
              devq = dev->sim->devq;
              mtx_assert(&devq->send_mtx, MA_OWNED);
              CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE,
                  ("xpt_freeze_devq_device(%d) %u->%u\n", count,
                  dev->ccbq.queue.qfrozen_cnt, dev->ccbq.queue.qfrozen_cnt + count));
              freeze = (dev->ccbq.queue.qfrozen_cnt += count);
              /* Remove frozen device from sendq. */
              if (device_is_queued(dev))
                      camq_remove(&devq->send_queue, dev->devq_entry.index);
              return (freeze);
      }
      
      u_int32_t
      xpt_freeze_devq(struct cam_path *path, u_int count)
      {
              struct cam_ed        *dev = path->device;
              struct cam_devq        *devq;
              uint32_t         freeze;
      
              devq = dev->sim->devq;
              mtx_lock(&devq->send_mtx);
              CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_freeze_devq(%d)\n", count));
              freeze = xpt_freeze_devq_device(dev, count);
              mtx_unlock(&devq->send_mtx);
              return (freeze);
      }
      
      u_int32_t
      xpt_freeze_simq(struct cam_sim *sim, u_int count)
      {
              struct cam_devq        *devq;
              uint32_t         freeze;
      
              devq = sim->devq;
              mtx_lock(&devq->send_mtx);
              freeze = (devq->send_queue.qfrozen_cnt += count);
              mtx_unlock(&devq->send_mtx);
              return (freeze);
      }
      
      static void
      xpt_release_devq_timeout(void *arg)
      {
              struct cam_ed *dev;
              struct cam_devq *devq;
      
              dev = (struct cam_ed *)arg;
              CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE, ("xpt_release_devq_timeout\n"));
              devq = dev->sim->devq;
              mtx_assert(&devq->send_mtx, MA_OWNED);
              if (xpt_release_devq_device(dev, /*count*/1, /*run_queue*/TRUE))
                      xpt_run_devq(devq);
      }
      
      void
      xpt_release_devq(struct cam_path *path, u_int count, int run_queue)
      {
              struct cam_ed *dev;
              struct cam_devq *devq;
      
              CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_release_devq(%d, %d)\n",
                  count, run_queue));
              dev = path->device;
              devq = dev->sim->devq;
              mtx_lock(&devq->send_mtx);
              if (xpt_release_devq_device(dev, count, run_queue))
                      xpt_run_devq(dev->sim->devq);
              mtx_unlock(&devq->send_mtx);
      }
      
      static int
      xpt_release_devq_device(struct cam_ed *dev, u_int count, int run_queue)
      {
      
              mtx_assert(&dev->sim->devq->send_mtx, MA_OWNED);
              CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE,
                  ("xpt_release_devq_device(%d, %d) %u->%u\n", count, run_queue,
                  dev->ccbq.queue.qfrozen_cnt, dev->ccbq.queue.qfrozen_cnt - count));
              if (count > dev->ccbq.queue.qfrozen_cnt) {
      #ifdef INVARIANTS
                      printf("xpt_release_devq(): requested %u > present %u\n",
                          count, dev->ccbq.queue.qfrozen_cnt);
      #endif
                      count = dev->ccbq.queue.qfrozen_cnt;
              }
              dev->ccbq.queue.qfrozen_cnt -= count;
              if (dev->ccbq.queue.qfrozen_cnt == 0) {
                      /*
                       * No longer need to wait for a successful
                       * command completion.
                       */
                      dev->flags &= ~CAM_DEV_REL_ON_COMPLETE;
                      /*
                       * Remove any timeouts that might be scheduled
                       * to release this queue.
                       */
                      if ((dev->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0) {
                              callout_stop(&dev->callout);
                              dev->flags &= ~CAM_DEV_REL_TIMEOUT_PENDING;
                      }
                      /*
                       * Now that we are unfrozen schedule the
                       * device so any pending transactions are
                       * run.
                       */
                      xpt_schedule_devq(dev->sim->devq, dev);
              } else
                      run_queue = 0;
              return (run_queue);
      }
      
      void
      xpt_release_simq(struct cam_sim *sim, int run_queue)
      {
              struct cam_devq        *devq;
      
              devq = sim->devq;
              mtx_lock(&devq->send_mtx);
              if (devq->send_queue.qfrozen_cnt <= 0) {
      #ifdef INVARIANTS
                      printf("xpt_release_simq: requested 1 > present %u\n",
                          devq->send_queue.qfrozen_cnt);
      #endif
              } else
                      devq->send_queue.qfrozen_cnt--;
              if (devq->send_queue.qfrozen_cnt == 0) {
                      /*
                       * If there is a timeout scheduled to release this
                       * sim queue, remove it.  The queue frozen count is
                       * already at 0.
                       */
                      if ((sim->flags & CAM_SIM_REL_TIMEOUT_PENDING) != 0){
                              callout_stop(&sim->callout);
                              sim->flags &= ~CAM_SIM_REL_TIMEOUT_PENDING;
                      }
                      if (run_queue) {
                              /*
                               * Now that we are unfrozen run the send queue.
                               */
                              xpt_run_devq(sim->devq);
                      }
              }
              mtx_unlock(&devq->send_mtx);
      }
      
      void
      xpt_done(union ccb *done_ccb)
      {
              struct cam_doneq *queue;
              int        run, hash;
      
      #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
              if (done_ccb->ccb_h.func_code == XPT_SCSI_IO &&
                  done_ccb->csio.bio != NULL)
                      biotrack(done_ccb->csio.bio, __func__);
      #endif
      
              CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE,
                  ("xpt_done: func= %#x %s status %#x\n",
                      done_ccb->ccb_h.func_code,
                      xpt_action_name(done_ccb->ccb_h.func_code),
                      done_ccb->ccb_h.status));
              if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0)
                      return;
      
              /* Store the time the ccb was in the sim */
              done_ccb->ccb_h.qos.periph_data = cam_iosched_delta_t(done_ccb->ccb_h.qos.periph_data);
              hash = (done_ccb->ccb_h.path_id + done_ccb->ccb_h.target_id +
                  done_ccb->ccb_h.target_lun) % cam_num_doneqs;
              queue = &cam_doneqs[hash];
              mtx_lock(&queue->cam_doneq_mtx);
              run = (queue->cam_doneq_sleep && STAILQ_EMPTY(&queue->cam_doneq));
              STAILQ_INSERT_TAIL(&queue->cam_doneq, &done_ccb->ccb_h, sim_links.stqe);
              done_ccb->ccb_h.pinfo.index = CAM_DONEQ_INDEX;
              mtx_unlock(&queue->cam_doneq_mtx);
              if (run)
                      wakeup(&queue->cam_doneq);
      }
      
      void
      xpt_done_direct(union ccb *done_ccb)
      {
      
              CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE,
                  ("xpt_done_direct: status %#x\n", done_ccb->ccb_h.status));
              if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0)
                      return;
      
              /* Store the time the ccb was in the sim */
              done_ccb->ccb_h.qos.periph_data = cam_iosched_delta_t(done_ccb->ccb_h.qos.periph_data);
              xpt_done_process(&done_ccb->ccb_h);
      }
      
      union ccb *
      xpt_alloc_ccb(void)
      {
              union ccb *new_ccb;
      
              new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_WAITOK);
              return (new_ccb);
      }
      
      union ccb *
      xpt_alloc_ccb_nowait(void)
      {
              union ccb *new_ccb;
      
              new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_NOWAIT);
              return (new_ccb);
      }
      
      void
      xpt_free_ccb(union ccb *free_ccb)
      {
              free(free_ccb, M_CAMCCB);
      }
      
      /* Private XPT functions */
      
      /*
       * Get a CAM control block for the caller. Charge the structure to the device
       * referenced by the path.  If we don't have sufficient resources to allocate
       * more ccbs, we return NULL.
       */
      static union ccb *
      xpt_get_ccb_nowait(struct cam_periph *periph)
      {
              union ccb *new_ccb;
      
 3162         new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_NOWAIT);
              if (new_ccb == NULL)
                      return (NULL);
              periph->periph_allocated++;
              cam_ccbq_take_opening(&periph->path->device->ccbq);
              return (new_ccb);
      }
      
      static union ccb *
      xpt_get_ccb(struct cam_periph *periph)
      {
              union ccb *new_ccb;
      
              cam_periph_unlock(periph);
              new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_WAITOK);
              cam_periph_lock(periph);
              periph->periph_allocated++;
              cam_ccbq_take_opening(&periph->path->device->ccbq);
              return (new_ccb);
      }
      
      union ccb *
      cam_periph_getccb(struct cam_periph *periph, u_int32_t priority)
      {
              struct ccb_hdr *ccb_h;
      
              CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("cam_periph_getccb\n"));
              cam_periph_assert(periph, MA_OWNED);
              while ((ccb_h = SLIST_FIRST(&periph->ccb_list)) == NULL ||
                  ccb_h->pinfo.priority != priority) {
                      if (priority < periph->immediate_priority) {
                              periph->immediate_priority = priority;
                              xpt_run_allocq(periph, 0);
                      } else
                              cam_periph_sleep(periph, &periph->ccb_list, PRIBIO,
                                  "cgticb", 0);
              }
              SLIST_REMOVE_HEAD(&periph->ccb_list, periph_links.sle);
              return ((union ccb *)ccb_h);
      }
      
      static void
      xpt_acquire_bus(struct cam_eb *bus)
      {
      
              xpt_lock_buses();
              bus->refcount++;
              xpt_unlock_buses();
      }
      
      static void
      xpt_release_bus(struct cam_eb *bus)
      {
      
              xpt_lock_buses();
              KASSERT(bus->refcount >= 1, ("bus->refcount >= 1"));
              if (--bus->refcount > 0) {
                      xpt_unlock_buses();
                      return;
              }
              TAILQ_REMOVE(&xsoftc.xpt_busses, bus, links);
              xsoftc.bus_generation++;
              xpt_unlock_buses();
              KASSERT(TAILQ_EMPTY(&bus->et_entries),
                  ("destroying bus, but target list is not empty"));
              cam_sim_release(bus->sim);
              mtx_destroy(&bus->eb_mtx);
              free(bus, M_CAMXPT);
      }
      
      static struct cam_et *
      xpt_alloc_target(struct cam_eb *bus, target_id_t target_id)
      {
              struct cam_et *cur_target, *target;
      
              mtx_assert(&xsoftc.xpt_topo_lock, MA_OWNED);
              mtx_assert(&bus->eb_mtx, MA_OWNED);
              target = (struct cam_et *)malloc(sizeof(*target), M_CAMXPT,
                                               M_NOWAIT|M_ZERO);
              if (target == NULL)
                      return (NULL);
      
              TAILQ_INIT(&target->ed_entries);
              target->bus = bus;
              target->target_id = target_id;
              target->refcount = 1;
              target->generation = 0;
              target->luns = NULL;
              mtx_init(&target->luns_mtx, "CAM LUNs lock", NULL, MTX_DEF);
              timevalclear(&target->last_reset);
              /*
               * Hold a reference to our parent bus so it
               * will not go away before we do.
               */
              bus->refcount++;
      
              /* Insertion sort into our bus's target list */
              cur_target = TAILQ_FIRST(&bus->et_entries);
              while (cur_target != NULL && cur_target->target_id < target_id)
                      cur_target = TAILQ_NEXT(cur_target, links);
              if (cur_target != NULL) {
                      TAILQ_INSERT_BEFORE(cur_target, target, links);
              } else {
                      TAILQ_INSERT_TAIL(&bus->et_entries, target, links);
              }
              bus->generation++;
              return (target);
      }
      
      static void
      xpt_acquire_target(struct cam_et *target)
      {
              struct cam_eb *bus = target->bus;
      
              mtx_lock(&bus->eb_mtx);
              target->refcount++;
              mtx_unlock(&bus->eb_mtx);
      }
      
      static void
      xpt_release_target(struct cam_et *target)
      {
              struct cam_eb *bus = target->bus;
      
              mtx_lock(&bus->eb_mtx);
              if (--target->refcount > 0) {
                      mtx_unlock(&bus->eb_mtx);
                      return;
              }
              TAILQ_REMOVE(&bus->et_entries, target, links);
              bus->generation++;
              mtx_unlock(&bus->eb_mtx);
              KASSERT(TAILQ_EMPTY(&target->ed_entries),
                  ("destroying target, but device list is not empty"));
              xpt_release_bus(bus);
              mtx_destroy(&target->luns_mtx);
              if (target->luns)
                      free(target->luns, M_CAMXPT);
              free(target, M_CAMXPT);
      }
      
      static struct cam_ed *
      xpt_alloc_device_default(struct cam_eb *bus, struct cam_et *target,
                               lun_id_t lun_id)
      {
              struct cam_ed *device;
      
              device = xpt_alloc_device(bus, target, lun_id);
              if (device == NULL)
                      return (NULL);
      
              device->mintags = 1;
              device->maxtags = 1;
              return (device);
      }
      
      static void
      xpt_destroy_device(void *context, int pending)
      {
              struct cam_ed        *device = context;
      
              mtx_lock(&device->device_mtx);
              mtx_destroy(&device->device_mtx);
              free(device, M_CAMDEV);
      }
      
      struct cam_ed *
      xpt_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id)
      {
              struct cam_ed        *cur_device, *device;
              struct cam_devq        *devq;
              cam_status status;
      
              mtx_assert(&bus->eb_mtx, MA_OWNED);
              /* Make space for us in the device queue on our bus */
              devq = bus->sim->devq;
              mtx_lock(&devq->send_mtx);
              status = cam_devq_resize(devq, devq->send_queue.array_size + 1);
              mtx_unlock(&devq->send_mtx);
              if (status != CAM_REQ_CMP)
                      return (NULL);
      
              device = (struct cam_ed *)malloc(sizeof(*device),
                                               M_CAMDEV, M_NOWAIT|M_ZERO);
              if (device == NULL)
                      return (NULL);
      
              cam_init_pinfo(&device->devq_entry);
              device->target = target;
              device->lun_id = lun_id;
              device->sim = bus->sim;
              if (cam_ccbq_init(&device->ccbq,
                                bus->sim->max_dev_openings) != 0) {
                      free(device, M_CAMDEV);
                      return (NULL);
              }
              SLIST_INIT(&device->asyncs);
              SLIST_INIT(&device->periphs);
              device->generation = 0;
              device->flags = CAM_DEV_UNCONFIGURED;
              device->tag_delay_count = 0;
              device->tag_saved_openings = 0;
              device->refcount = 1;
              mtx_init(&device->device_mtx, "CAM device lock", NULL, MTX_DEF);
              callout_init_mtx(&device->callout, &devq->send_mtx, 0);
              TASK_INIT(&device->device_destroy_task, 0, xpt_destroy_device, device);
              /*
               * Hold a reference to our parent bus so it
               * will not go away before we do.
               */
              target->refcount++;
      
              cur_device = TAILQ_FIRST(&target->ed_entries);
              while (cur_device != NULL && cur_device->lun_id < lun_id)
                      cur_device = TAILQ_NEXT(cur_device, links);
              if (cur_device != NULL)
                      TAILQ_INSERT_BEFORE(cur_device, device, links);
              else
                      TAILQ_INSERT_TAIL(&target->ed_entries, device, links);
              target->generation++;
              return (device);
      }
      
      void
      xpt_acquire_device(struct cam_ed *device)
      {
              struct cam_eb *bus = device->target->bus;
      
              mtx_lock(&bus->eb_mtx);
              device->refcount++;
              mtx_unlock(&bus->eb_mtx);
      }
      
      void
      xpt_release_device(struct cam_ed *device)
      {
              struct cam_eb *bus = device->target->bus;
              struct cam_devq *devq;
      
              mtx_lock(&bus->eb_mtx);
              if (--device->refcount > 0) {
                      mtx_unlock(&bus->eb_mtx);
                      return;
              }
      
              TAILQ_REMOVE(&device->target->ed_entries, device,links);
              device->target->generation++;
              mtx_unlock(&bus->eb_mtx);
      
              /* Release our slot in the devq */
              devq = bus->sim->devq;
              mtx_lock(&devq->send_mtx);
              cam_devq_resize(devq, devq->send_queue.array_size - 1);
      
              KASSERT(SLIST_EMPTY(&device->periphs),
                  ("destroying device, but periphs list is not empty"));
              KASSERT(device->devq_entry.index == CAM_UNQUEUED_INDEX,
                  ("destroying device while still queued for ccbs"));
      
              /* The send_mtx must be held when accessing the callout */
              if ((device->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0)
                      callout_stop(&device->callout);
      
              mtx_unlock(&devq->send_mtx);
      
              xpt_release_target(device->target);
      
              cam_ccbq_fini(&device->ccbq);
              /*
               * Free allocated memory.  free(9) does nothing if the
               * supplied pointer is NULL, so it is safe to call without
               * checking.
               */
              free(device->supported_vpds, M_CAMXPT);
              free(device->device_id, M_CAMXPT);
              free(device->ext_inq, M_CAMXPT);
              free(device->physpath, M_CAMXPT);
              free(device->rcap_buf, M_CAMXPT);
              free(device->serial_num, M_CAMXPT);
              free(device->nvme_data, M_CAMXPT);
              free(device->nvme_cdata, M_CAMXPT);
              taskqueue_enqueue(xsoftc.xpt_taskq, &device->device_destroy_task);
      }
      
      u_int32_t
      xpt_dev_ccbq_resize(struct cam_path *path, int newopenings)
      {
              int        result;
              struct        cam_ed *dev;
      
              dev = path->device;
              mtx_lock(&dev->sim->devq->send_mtx);
              result = cam_ccbq_resize(&dev->ccbq, newopenings);
              mtx_unlock(&dev->sim->devq->send_mtx);
              if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0
               || (dev->inq_flags & SID_CmdQue) != 0)
                      dev->tag_saved_openings = newopenings;
              return (result);
      }
      
      static struct cam_eb *
      xpt_find_bus(path_id_t path_id)
      {
              struct cam_eb *bus;
      
              xpt_lock_buses();
              for (bus = TAILQ_FIRST(&xsoftc.xpt_busses);
                   bus != NULL;
                   bus = TAILQ_NEXT(bus, links)) {
                      if (bus->path_id == path_id) {
                              bus->refcount++;
                              break;
                      }
              }
              xpt_unlock_buses();
              return (bus);
      }
      
      static struct cam_et *
      xpt_find_target(struct cam_eb *bus, target_id_t        target_id)
      {
              struct cam_et *target;
      
              mtx_assert(&bus->eb_mtx, MA_OWNED);
              for (target = TAILQ_FIRST(&bus->et_entries);
                   target != NULL;
                   target = TAILQ_NEXT(target, links)) {
                      if (target->target_id == target_id) {
                              target->refcount++;
                              break;
                      }
              }
              return (target);
      }
      
      static struct cam_ed *
      xpt_find_device(struct cam_et *target, lun_id_t lun_id)
      {
              struct cam_ed *device;
      
              mtx_assert(&target->bus->eb_mtx, MA_OWNED);
              for (device = TAILQ_FIRST(&target->ed_entries);
                   device != NULL;
                   device = TAILQ_NEXT(device, links)) {
                      if (device->lun_id == lun_id) {
                              device->refcount++;
                              break;
                      }
              }
              return (device);
      }
      
      void
      xpt_start_tags(struct cam_path *path)
      {
              struct ccb_relsim crs;
              struct cam_ed *device;
              struct cam_sim *sim;
              int    newopenings;
      
              device = path->device;
              sim = path->bus->sim;
              device->flags &= ~CAM_DEV_TAG_AFTER_COUNT;
              xpt_freeze_devq(path, /*count*/1);
              device->inq_flags |= SID_CmdQue;
              if (device->tag_saved_openings != 0)
                      newopenings = device->tag_saved_openings;
              else
                      newopenings = min(device->maxtags,
                                        sim->max_tagged_dev_openings);
              xpt_dev_ccbq_resize(path, newopenings);
              xpt_async(AC_GETDEV_CHANGED, path, NULL);
              xpt_setup_ccb(&crs.ccb_h, path, CAM_PRIORITY_NORMAL);
              crs.ccb_h.func_code = XPT_REL_SIMQ;
              crs.release_flags = RELSIM_RELEASE_AFTER_QEMPTY;
              crs.openings
                  = crs.release_timeout
                  = crs.qf