/*        $OpenBSD: kern_sig.c,v 1.233 2019/08/07 14:14:01 deraadt Exp $        */
      /*        $NetBSD: kern_sig.c,v 1.54 1996/04/22 01:38:32 christos Exp $        */
      
      /*
       * Copyright (c) 1997 Theo de Raadt. All rights reserved. 
       * Copyright (c) 1982, 1986, 1989, 1991, 1993
       *        The Regents of the University of California.  All rights reserved.
       * (c) UNIX System Laboratories, Inc.
       * All or some portions of this file are derived from material licensed
       * to the University of California by American Telephone and Telegraph
       * Co. or Unix System Laboratories, Inc. and are reproduced herein with
       * the permission of UNIX System Laboratories, Inc.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)kern_sig.c        8.7 (Berkeley) 4/18/94
       */
      
      #define        SIGPROP                /* include signal properties table */
      #include <sys/param.h>
      #include <sys/signalvar.h>
      #include <sys/resourcevar.h>
      #include <sys/queue.h>
      #include <sys/namei.h>
      #include <sys/vnode.h>
      #include <sys/event.h>
      #include <sys/proc.h>
      #include <sys/systm.h>
      #include <sys/acct.h>
      #include <sys/fcntl.h>
      #include <sys/filedesc.h>
      #include <sys/kernel.h>
      #include <sys/wait.h>
      #include <sys/ktrace.h>
      #include <sys/stat.h>
      #include <sys/core.h>
      #include <sys/malloc.h>
      #include <sys/pool.h>
      #include <sys/ptrace.h>
      #include <sys/sched.h>
      #include <sys/user.h>
      #include <sys/syslog.h>
      #include <sys/pledge.h>
      #include <sys/witness.h>
      
      #include <sys/mount.h>
      #include <sys/syscallargs.h>
      
      #include <uvm/uvm_extern.h>
      #include <machine/tcb.h>
      
      int        filt_sigattach(struct knote *kn);
      void        filt_sigdetach(struct knote *kn);
      int        filt_signal(struct knote *kn, long hint);
      
      struct filterops sig_filtops =
              { 0, filt_sigattach, filt_sigdetach, filt_signal };
      
      void proc_stop(struct proc *p, int);
      void proc_stop_sweep(void *);
      struct timeout proc_stop_to;
      
      void postsig(struct proc *, int);
      int cansignal(struct proc *, struct process *, int);
      
      struct pool sigacts_pool;        /* memory pool for sigacts structures */
      
      void sigio_del(struct sigiolst *);
      void sigio_unlink(struct sigio_ref *, struct sigiolst *);
      struct mutex sigio_lock = MUTEX_INITIALIZER(IPL_HIGH);
      
      /*
       * Can thread p, send the signal signum to process qr?
       */
      int
      cansignal(struct proc *p, struct process *qr, int signum)
      {
              struct process *pr = p->p_p;
              struct ucred *uc = p->p_ucred;
              struct ucred *quc = qr->ps_ucred;
      
              if (uc->cr_uid == 0)
                      return (1);                /* root can always signal */
      
              if (pr == qr)
                      return (1);                /* process can always signal itself */
      
              /* optimization: if the same creds then the tests below will pass */
              if (uc == quc)
                      return (1);
      
              if (signum == SIGCONT && qr->ps_session == pr->ps_session)
                      return (1);                /* SIGCONT in session */
      
              /*
               * Using kill(), only certain signals can be sent to setugid
               * child processes
               */
              if (qr->ps_flags & PS_SUGID) {
                      switch (signum) {
                      case 0:
                      case SIGKILL:
                      case SIGINT:
                      case SIGTERM:
                      case SIGALRM:
                      case SIGSTOP:
                      case SIGTTIN:
                      case SIGTTOU:
                      case SIGTSTP:
                      case SIGHUP:
                      case SIGUSR1:
                      case SIGUSR2:
                              if (uc->cr_ruid == quc->cr_ruid ||
                                  uc->cr_uid == quc->cr_ruid)
                                      return (1);
                      }
                      return (0);
              }
      
              if (uc->cr_ruid == quc->cr_ruid ||
                  uc->cr_ruid == quc->cr_svuid ||
                  uc->cr_uid == quc->cr_ruid ||
                  uc->cr_uid == quc->cr_svuid)
                      return (1);
              return (0);
      }
      
      /*
       * Initialize signal-related data structures.
       */
      void
      signal_init(void)
      {
              timeout_set(&proc_stop_to, proc_stop_sweep, NULL);
      
              pool_init(&sigacts_pool, sizeof(struct sigacts), 0, IPL_NONE,
                  PR_WAITOK, "sigapl", NULL);
      }
      
      /*
       * Create an initial sigacts structure, using the same signal state
       * as p.
       */
      struct sigacts *
      sigactsinit(struct process *pr)
      {
              struct sigacts *ps;
      
              ps = pool_get(&sigacts_pool, PR_WAITOK);
              memcpy(ps, pr->ps_sigacts, sizeof(struct sigacts));
              ps->ps_refcnt = 1;
              return (ps);
      }
      
      /*
       * Share a sigacts structure.
       */
      struct sigacts *
      sigactsshare(struct process *pr)
      {
              struct sigacts *ps = pr->ps_sigacts;
      
              ps->ps_refcnt++;
              return ps;
      }
      
      /*
       * Initialize a new sigaltstack structure.
       */
      void
      sigstkinit(struct sigaltstack *ss)
      {
              ss->ss_flags = SS_DISABLE;
              ss->ss_size = 0;
              ss->ss_sp = 0;
      }
      
      /*
       * Make this process not share its sigacts, maintaining all
       * signal state.
       */
      void
      sigactsunshare(struct process *pr)
      {
              struct sigacts *newps;
      
              if (pr->ps_sigacts->ps_refcnt == 1)
                      return;
      
              newps = sigactsinit(pr);
              sigactsfree(pr);
              pr->ps_sigacts = newps;
      }
      
      /*
       * Release a sigacts structure.
       */
      void
      sigactsfree(struct process *pr)
      {
              struct sigacts *ps = pr->ps_sigacts;
      
              if (--ps->ps_refcnt > 0)
                      return;
      
              pr->ps_sigacts = NULL;
      
              pool_put(&sigacts_pool, ps);
      }
      
      int
      sys_sigaction(struct proc *p, void *v, register_t *retval)
      {
              struct sys_sigaction_args /* {
                      syscallarg(int) signum;
                      syscallarg(const struct sigaction *) nsa;
                      syscallarg(struct sigaction *) osa;
              } */ *uap = v;
              struct sigaction vec;
      #ifdef KTRACE
              struct sigaction ovec;
      #endif
              struct sigaction *sa;
              const struct sigaction *nsa;
              struct sigaction *osa;
              struct sigacts *ps = p->p_p->ps_sigacts;
              int signum;
              int bit, error;
      
              signum = SCARG(uap, signum);
              nsa = SCARG(uap, nsa);
              osa = SCARG(uap, osa);
      
              if (signum <= 0 || signum >= NSIG ||
                  (nsa && (signum == SIGKILL || signum == SIGSTOP)))
                      return (EINVAL);
              sa = &vec;
              if (osa) {
                      sa->sa_handler = ps->ps_sigact[signum];
                      sa->sa_mask = ps->ps_catchmask[signum];
                      bit = sigmask(signum);
                      sa->sa_flags = 0;
                      if ((ps->ps_sigonstack & bit) != 0)
                              sa->sa_flags |= SA_ONSTACK;
                      if ((ps->ps_sigintr & bit) == 0)
                              sa->sa_flags |= SA_RESTART;
                      if ((ps->ps_sigreset & bit) != 0)
                              sa->sa_flags |= SA_RESETHAND;
                      if ((ps->ps_siginfo & bit) != 0)
                              sa->sa_flags |= SA_SIGINFO;
                      if (signum == SIGCHLD) {
                              if ((ps->ps_flags & SAS_NOCLDSTOP) != 0)
                                      sa->sa_flags |= SA_NOCLDSTOP;
                              if ((ps->ps_flags & SAS_NOCLDWAIT) != 0)
                                      sa->sa_flags |= SA_NOCLDWAIT;
                      }
                      if ((sa->sa_mask & bit) == 0)
                              sa->sa_flags |= SA_NODEFER;
                      sa->sa_mask &= ~bit;
                      error = copyout(sa, osa, sizeof (vec));
                      if (error)
                              return (error);
      #ifdef KTRACE
                      if (KTRPOINT(p, KTR_STRUCT))
                              ovec = vec;
      #endif
              }
              if (nsa) {
                      error = copyin(nsa, sa, sizeof (vec));
                      if (error)
                              return (error);
      #ifdef KTRACE
                      if (KTRPOINT(p, KTR_STRUCT))
                              ktrsigaction(p, sa);
      #endif
                      setsigvec(p, signum, sa);
              }
      #ifdef KTRACE
              if (osa && KTRPOINT(p, KTR_STRUCT))
                      ktrsigaction(p, &ovec);
      #endif
              return (0);
      }
      
      void
      setsigvec(struct proc *p, int signum, struct sigaction *sa)
      {
              struct sigacts *ps = p->p_p->ps_sigacts;
              int bit;
              int s;
      
              bit = sigmask(signum);
              /*
               * Change setting atomically.
               */
              s = splhigh();
              ps->ps_sigact[signum] = sa->sa_handler;
              if ((sa->sa_flags & SA_NODEFER) == 0)
                      sa->sa_mask |= sigmask(signum);
              ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
              if (signum == SIGCHLD) {
                      if (sa->sa_flags & SA_NOCLDSTOP)
                              atomic_setbits_int(&ps->ps_flags, SAS_NOCLDSTOP);
                      else
                              atomic_clearbits_int(&ps->ps_flags, SAS_NOCLDSTOP);
                      /*
                       * If the SA_NOCLDWAIT flag is set or the handler
                       * is SIG_IGN we reparent the dying child to PID 1
                       * (init) which will reap the zombie.  Because we use
                       * init to do our dirty work we never set SAS_NOCLDWAIT
                       * for PID 1.
                       * XXX exit1 rework means this is unnecessary?
                       */
                      if (initprocess->ps_sigacts != ps &&
                          ((sa->sa_flags & SA_NOCLDWAIT) ||
                          sa->sa_handler == SIG_IGN))
                              atomic_setbits_int(&ps->ps_flags, SAS_NOCLDWAIT);
                      else
                              atomic_clearbits_int(&ps->ps_flags, SAS_NOCLDWAIT);
              }
              if ((sa->sa_flags & SA_RESETHAND) != 0)
                      ps->ps_sigreset |= bit;
              else
                      ps->ps_sigreset &= ~bit;
              if ((sa->sa_flags & SA_SIGINFO) != 0)
                      ps->ps_siginfo |= bit;
              else
                      ps->ps_siginfo &= ~bit;
              if ((sa->sa_flags & SA_RESTART) == 0)
                      ps->ps_sigintr |= bit;
              else
                      ps->ps_sigintr &= ~bit;
              if ((sa->sa_flags & SA_ONSTACK) != 0)
                      ps->ps_sigonstack |= bit;
              else
                      ps->ps_sigonstack &= ~bit;
              /*
               * Set bit in ps_sigignore for signals that are set to SIG_IGN,
               * and for signals set to SIG_DFL where the default is to ignore.
               * However, don't put SIGCONT in ps_sigignore,
               * as we have to restart the process.
               */
              if (sa->sa_handler == SIG_IGN ||
                  (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
                      atomic_clearbits_int(&p->p_siglist, bit);
                      atomic_clearbits_int(&p->p_p->ps_siglist, bit);
                      if (signum != SIGCONT)
                              ps->ps_sigignore |= bit;        /* easier in psignal */
                      ps->ps_sigcatch &= ~bit;
              } else {
                      ps->ps_sigignore &= ~bit;
                      if (sa->sa_handler == SIG_DFL)
                              ps->ps_sigcatch &= ~bit;
                      else
                              ps->ps_sigcatch |= bit;
              }
              splx(s);
      }
      
      /*
       * Initialize signal state for process 0;
       * set to ignore signals that are ignored by default.
       */
      void
      siginit(struct process *pr)
      {
              struct sigacts *ps = pr->ps_sigacts;
              int i;
      
              for (i = 0; i < NSIG; i++)
                      if (sigprop[i] & SA_IGNORE && i != SIGCONT)
                              ps->ps_sigignore |= sigmask(i);
              ps->ps_flags = SAS_NOCLDWAIT | SAS_NOCLDSTOP;
      }
      
      /*
       * Reset signals for an exec by the specified thread.
       */
      void
      execsigs(struct proc *p)
      {
              struct sigacts *ps;
              int nc, mask;
      
              sigactsunshare(p->p_p);
              ps = p->p_p->ps_sigacts;
      
              /*
               * Reset caught signals.  Held signals remain held
               * through p_sigmask (unless they were caught,
               * and are now ignored by default).
               */
              while (ps->ps_sigcatch) {
                      nc = ffs((long)ps->ps_sigcatch);
                      mask = sigmask(nc);
                      ps->ps_sigcatch &= ~mask;
                      if (sigprop[nc] & SA_IGNORE) {
                              if (nc != SIGCONT)
                                      ps->ps_sigignore |= mask;
                              atomic_clearbits_int(&p->p_siglist, mask);
                              atomic_clearbits_int(&p->p_p->ps_siglist, mask);
                      }
                      ps->ps_sigact[nc] = SIG_DFL;
              }
              /*
               * Reset stack state to the user stack.
               * Clear set of signals caught on the signal stack.
               */
              sigstkinit(&p->p_sigstk);
              ps->ps_flags &= ~SAS_NOCLDWAIT;
              if (ps->ps_sigact[SIGCHLD] == SIG_IGN)
                      ps->ps_sigact[SIGCHLD] = SIG_DFL;
      }
      
      /*
       * Manipulate signal mask.
       * Note that we receive new mask, not pointer,
       * and return old mask as return value;
       * the library stub does the rest.
       */
      int
      sys_sigprocmask(struct proc *p, void *v, register_t *retval)
      {
              struct sys_sigprocmask_args /* {
                      syscallarg(int) how;
                      syscallarg(sigset_t) mask;
              } */ *uap = v;
              int error = 0;
              sigset_t mask;
      
              *retval = p->p_sigmask;
              mask = SCARG(uap, mask) &~ sigcantmask;
      
              switch (SCARG(uap, how)) {
              case SIG_BLOCK:
                      atomic_setbits_int(&p->p_sigmask, mask);
                      break;
              case SIG_UNBLOCK:
                      atomic_clearbits_int(&p->p_sigmask, mask);
                      break;
              case SIG_SETMASK:
                      p->p_sigmask = mask;
                      break;
              default:
                      error = EINVAL;
                      break;
              }
              return (error);
      }
      
      int
      sys_sigpending(struct proc *p, void *v, register_t *retval)
      {
      
              *retval = p->p_siglist | p->p_p->ps_siglist;
              return (0);
      }
      
      /*
       * Temporarily replace calling proc's signal mask for the duration of a
       * system call.  Original signal mask will be restored by userret().
       */
      void
      dosigsuspend(struct proc *p, sigset_t newmask)
      {
              KASSERT(p == curproc);
      
              p->p_oldmask = p->p_sigmask;
              atomic_setbits_int(&p->p_flag, P_SIGSUSPEND);
              p->p_sigmask = newmask;
      }
      
      /*
       * Suspend process until signal, providing mask to be set
       * in the meantime.  Note nonstandard calling convention:
       * libc stub passes mask, not pointer, to save a copyin.
       */
      int
      sys_sigsuspend(struct proc *p, void *v, register_t *retval)
      {
              struct sys_sigsuspend_args /* {
                      syscallarg(int) mask;
              } */ *uap = v;
              struct process *pr = p->p_p;
              struct sigacts *ps = pr->ps_sigacts;
      
              dosigsuspend(p, SCARG(uap, mask) &~ sigcantmask);
              while (tsleep(ps, PPAUSE|PCATCH, "pause", 0) == 0)
                      /* void */;
              /* always return EINTR rather than ERESTART... */
              return (EINTR);
      }
      
      int
      sigonstack(size_t stack)
      {
              const struct sigaltstack *ss = &curproc->p_sigstk;
      
              return (ss->ss_flags & SS_DISABLE ? 0 :
                  (stack - (size_t)ss->ss_sp < ss->ss_size));
      }
      
      int
      sys_sigaltstack(struct proc *p, void *v, register_t *retval)
      {
              struct sys_sigaltstack_args /* {
                      syscallarg(const struct sigaltstack *) nss;
                      syscallarg(struct sigaltstack *) oss;
              } */ *uap = v;
              struct sigaltstack ss;
              const struct sigaltstack *nss;
              struct sigaltstack *oss;
              int onstack = sigonstack(PROC_STACK(p));
              int error;
      
              nss = SCARG(uap, nss);
              oss = SCARG(uap, oss);
      
              if (oss != NULL) {
                      ss = p->p_sigstk;
                      if (onstack)
                              ss.ss_flags |= SS_ONSTACK;
                      if ((error = copyout(&ss, oss, sizeof(ss))))
                              return (error);
              }
              if (nss == NULL)
                      return (0);
              error = copyin(nss, &ss, sizeof(ss));
              if (error)
                      return (error);
              if (onstack)
                      return (EPERM);
              if (ss.ss_flags & ~SS_DISABLE)
                      return (EINVAL);
              if (ss.ss_flags & SS_DISABLE) {
                      p->p_sigstk.ss_flags = ss.ss_flags;
                      return (0);
              }
              if (ss.ss_size < MINSIGSTKSZ)
                      return (ENOMEM);
      
              error = uvm_map_remap_as_stack(p, (vaddr_t)ss.ss_sp, ss.ss_size);
              if (error)
                      return (error);
      
              p->p_sigstk = ss;
              return (0);
      }
      
      int
      sys_kill(struct proc *cp, void *v, register_t *retval)
      {
              struct sys_kill_args /* {
                      syscallarg(int) pid;
                      syscallarg(int) signum;
              } */ *uap = v;
              struct process *pr;
              int pid = SCARG(uap, pid);
              int signum = SCARG(uap, signum);
              int error;
              int zombie = 0;
      
              if ((error = pledge_kill(cp, pid)) != 0)
                      return (error);
              if (((u_int)signum) >= NSIG)
                      return (EINVAL);
              if (pid > 0) {
                      if ((pr = prfind(pid)) == NULL) {
                              if ((pr = zombiefind(pid)) == NULL)
                                      return (ESRCH);
                              else
                                      zombie = 1;
                      }
                      if (!cansignal(cp, pr, signum))
                              return (EPERM);
      
                      /* kill single process */
                      if (signum && !zombie)
                              prsignal(pr, signum);
                      return (0);
              }
              switch (pid) {
              case -1:                /* broadcast signal */
                      return (killpg1(cp, signum, 0, 1));
              case 0:                        /* signal own process group */
                      return (killpg1(cp, signum, 0, 0));
              default:                /* negative explicit process group */
                      return (killpg1(cp, signum, -pid, 0));
              }
      }
      
      int
      sys_thrkill(struct proc *cp, void *v, register_t *retval)
      {
              struct sys_thrkill_args /* {
                      syscallarg(pid_t) tid;
                      syscallarg(int) signum;
                      syscallarg(void *) tcb;
              } */ *uap = v;
              struct proc *p;
              int tid = SCARG(uap, tid);
              int signum = SCARG(uap, signum);
              void *tcb;
      
              if (((u_int)signum) >= NSIG)
                      return (EINVAL);
              if (tid > THREAD_PID_OFFSET) {
                      if ((p = tfind(tid - THREAD_PID_OFFSET)) == NULL)
                              return (ESRCH);
      
                      /* can only kill threads in the same process */
                      if (p->p_p != cp->p_p)
                              return (ESRCH);
              } else if (tid == 0)
                      p = cp;
              else
                      return (EINVAL);
      
              /* optionally require the target thread to have the given tcb addr */
              tcb = SCARG(uap, tcb);
              if (tcb != NULL && tcb != TCB_GET(p))
                      return (ESRCH);
      
              if (signum)
                      ptsignal(p, signum, STHREAD);
              return (0);
      }
      
      /*
       * Common code for kill process group/broadcast kill.
       * cp is calling process.
       */
      int
      killpg1(struct proc *cp, int signum, int pgid, int all)
      {
              struct process *pr;
              struct pgrp *pgrp;
              int nfound = 0;
      
              if (all) {
                      /* 
                       * broadcast
                       */
                      LIST_FOREACH(pr, &allprocess, ps_list) {
                              if (pr->ps_pid <= 1 ||
                                  pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) ||
                                  pr == cp->p_p || !cansignal(cp, pr, signum))
                                      continue;
                              nfound++;
                              if (signum)
                                      prsignal(pr, signum);
                      }
              } else {
                      if (pgid == 0)
                              /*
                               * zero pgid means send to my process group.
                               */
                              pgrp = cp->p_p->ps_pgrp;
                      else {
                              pgrp = pgfind(pgid);
                              if (pgrp == NULL)
                                      return (ESRCH);
                      }
                      LIST_FOREACH(pr, &pgrp->pg_members, ps_pglist) {
                              if (pr->ps_pid <= 1 || pr->ps_flags & PS_SYSTEM ||
                                  !cansignal(cp, pr, signum))
                                      continue;
                              nfound++;
                              if (signum)
                                      prsignal(pr, signum);
                      }
              }
              return (nfound ? 0 : ESRCH);
      }
      
      #define CANDELIVER(uid, euid, pr) \
              (euid == 0 || \
              (uid) == (pr)->ps_ucred->cr_ruid || \
              (uid) == (pr)->ps_ucred->cr_svuid || \
              (uid) == (pr)->ps_ucred->cr_uid || \
              (euid) == (pr)->ps_ucred->cr_ruid || \
              (euid) == (pr)->ps_ucred->cr_svuid || \
              (euid) == (pr)->ps_ucred->cr_uid)
      
      #define CANSIGIO(cr, pr) \
              CANDELIVER((cr)->cr_ruid, (cr)->cr_uid, (pr))
      
      /*
       * Deliver signum to pgid, but first check uid/euid against each
       * process and see if it is permitted.
       */
      void
      csignal(pid_t pgid, int signum, uid_t uid, uid_t euid)
      {
              struct pgrp *pgrp;
              struct process *pr;
      
              if (pgid == 0)
                      return;
              if (pgid < 0) {
                      pgid = -pgid;
                      if ((pgrp = pgfind(pgid)) == NULL)
                              return;
                      LIST_FOREACH(pr, &pgrp->pg_members, ps_pglist)
                              if (CANDELIVER(uid, euid, pr))
                                      prsignal(pr, signum);
              } else {
                      if ((pr = prfind(pgid)) == NULL)
                              return;
                      if (CANDELIVER(uid, euid, pr))
                              prsignal(pr, signum);
              }
      }
      
      /*
       * Send a signal to a process group.  If checktty is 1,
       * limit to members which have a controlling terminal.
       */
      void
      pgsignal(struct pgrp *pgrp, int signum, int checkctty)
  132 {
              struct process *pr;
      
  132         if (pgrp)
                      LIST_FOREACH(pr, &pgrp->pg_members, ps_pglist)
                              if (checkctty == 0 || pr->ps_flags & PS_CONTROLT)
                                      prsignal(pr, signum);
      }
      
      /*
       * Send a SIGIO or SIGURG signal to a process or process group using stored
       * credentials rather than those of the current process.
       */
      void
      pgsigio(struct sigio_ref *sir, int sig, int checkctty)
   14 {
              struct process *pr;
              struct sigio *sigio;
      
   12         if (sir->sir_sigio == NULL)
                      return;
      
              mtx_enter(&sigio_lock);
              sigio = sir->sir_sigio;
              if (sigio == NULL)
                      goto out;
              if (sigio->sio_pgid > 0) {
    2                 if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc))
                              prsignal(sigio->sio_proc, sig);
              } else if (sigio->sio_pgid < 0) {
                      LIST_FOREACH(pr, &sigio->sio_pgrp->pg_members, ps_pglist) {
                              if (CANSIGIO(sigio->sio_ucred, pr) &&
                                  (checkctty == 0 || (pr->ps_flags & PS_CONTROLT)))
                                      prsignal(pr, sig);
                      }
              }
      out:
              mtx_leave(&sigio_lock);
      }
      
      /*
       * Recalculate the signal mask and reset the signal disposition after
       * usermode frame for delivery is formed.
       */
      void
      postsig_done(struct proc *p, int signum, struct sigacts *ps)
      {
              int mask = sigmask(signum);
      
              KERNEL_ASSERT_LOCKED();
      
              p->p_ru.ru_nsignals++;
              atomic_setbits_int(&p->p_sigmask, ps->ps_catchmask[signum]);
              if ((ps->ps_sigreset & mask) != 0) {
                      ps->ps_sigcatch &= ~mask;
                      if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
                              ps->ps_sigignore |= mask;
                      ps->ps_sigact[signum] = SIG_DFL;
              }
      }
      
      /*
       * Send a signal caused by a trap to the current thread
       * If it will be caught immediately, deliver it with correct code.
       * Otherwise, post it normally.
       */
      void
      trapsignal(struct proc *p, int signum, u_long trapno, int code,
          union sigval sigval)
      {
              struct process *pr = p->p_p;
              struct sigacts *ps = pr->ps_sigacts;
              int mask;
      
              switch (signum) {
              case SIGILL:
              case SIGBUS:
              case SIGSEGV:
                      pr->ps_acflag |= ATRAP;
                      break;
              }
      
              mask = sigmask(signum);
              if ((pr->ps_flags & PS_TRACED) == 0 &&
                  (ps->ps_sigcatch & mask) != 0 &&
                  (p->p_sigmask & mask) == 0) {
                      siginfo_t si;
                      initsiginfo(&si, signum, trapno, code, sigval);
      #ifdef KTRACE
                      if (KTRPOINT(p, KTR_PSIG)) {
                              ktrpsig(p, signum, ps->ps_sigact[signum],
                                  p->p_sigmask, code, &si);
                      }
      #endif
                      sendsig(ps->ps_sigact[signum], signum, p->p_sigmask, &si);
                      postsig_done(p, signum, ps);
              } else {
                      p->p_sisig = signum;
                      p->p_sitrapno = trapno;        /* XXX for core dump/debugger */
                      p->p_sicode = code;
                      p->p_sigval = sigval;
      
                      /*
                       * Signals like SIGBUS and SIGSEGV should not, when
                       * generated by the kernel, be ignorable or blockable.
                       * If it is and we're not being traced, then just kill
                       * the process.
                       */
                      if ((pr->ps_flags & PS_TRACED) == 0 &&
                          (sigprop[signum] & SA_KILL) &&
                          ((p->p_sigmask & mask) || (ps->ps_sigignore & mask)))
                              sigexit(p, signum);
                      ptsignal(p, signum, STHREAD);
              }
      }
      
      /*
       * Send the signal to the process.  If the signal has an action, the action
       * is usually performed by the target process rather than the caller; we add
       * the signal to the set of pending signals for the process.
       *
       * Exceptions:
       *   o When a stop signal is sent to a sleeping process that takes the
       *     default action, the process is stopped without awakening it.
       *   o SIGCONT restarts stopped processes (or puts them back to sleep)
       *     regardless of the signal action (eg, blocked or ignored).
       *
       * Other ignored signals are discarded immediately.
       */
      void
      psignal(struct proc *p, int signum)
      {
              ptsignal(p, signum, SPROCESS);
      }
      
      /*
       * type = SPROCESS        process signal, can be diverted (sigwait())
       * type = STHREAD        thread signal, but should be propagated if unhandled
       * type = SPROPAGATED        propagated to this thread, so don't propagate again
       */
      void
      ptsignal(struct proc *p, int signum, enum signal_type type)
    2 {
              int s, prop;
              sig_t action;
              int mask;
              int *siglist;
              struct process *pr = p->p_p;
              struct proc *q;
              int wakeparent = 0;
      
              KERNEL_ASSERT_LOCKED();
      
      #ifdef DIAGNOSTIC
              if ((u_int)signum >= NSIG || signum == 0)
                      panic("psignal signal number");
      #endif
      
              /* Ignore signal if the target process is exiting */
              if (pr->ps_flags & PS_EXITING)
                      return;
      
              mask = sigmask(signum);
      
              if (type == SPROCESS) {
                      /* Accept SIGKILL to coredumping processes */
                      if (pr->ps_flags & PS_COREDUMP && signum == SIGKILL) {
                              if (pr->ps_single != NULL)
                                      p = pr->ps_single;
                              atomic_setbits_int(&p->p_p->ps_siglist, mask);
                              return;
                      }
      
                      /*
                       * If the current thread can process the signal
                       * immediately (it's unblocked) then have it take it.
                       */
                      q = curproc;
    2                 if (q != NULL && q->p_p == pr && (q->p_flag & P_WEXIT) == 0 &&
                          (q->p_sigmask & mask) == 0)
                              p = q;
                      else {
                              /*
                               * A process-wide signal can be diverted to a
                               * different thread that's in sigwait() for this
                               * signal.  If there isn't such a thread, then
                               * pick a thread that doesn't have it blocked so
                               * that the stop/kill consideration isn't
                               * delayed.  Otherwise, mark it pending on the
                               * main thread.
                               */
    1                         TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
                                      /* ignore exiting threads */
                                      if (q->p_flag & P_WEXIT)
                                              continue;
      
                                      /* skip threads that have the signal blocked */
                                      if ((q->p_sigmask & mask) != 0)
                                              continue;
      
                                      /* okay, could send to this thread */
                                      p = q;
      
                                      /*
                                       * sigsuspend, sigwait, ppoll/pselect, etc?
                                       * Definitely go to this thread, as it's
                                       * already blocked in the kernel.
                                       */
    1                                 if (q->p_flag & P_SIGSUSPEND)
                                              break;
                              }
                      }
              }
      
    1         if (type != SPROPAGATED)
                      KNOTE(&pr->ps_klist, NOTE_SIGNAL | signum);
      
              prop = sigprop[signum];
      
              /*
               * If proc is traced, always give parent a chance.
               */
              if (pr->ps_flags & PS_TRACED) {
                      action = SIG_DFL;
              } else {
                      /*
                       * If the signal is being ignored,
                       * then we forget about it immediately.
                       * (Note: we don't set SIGCONT in ps_sigignore,
                       * and if it is set to SIG_IGN,
                       * action will be SIG_DFL here.)
                       */
    2                 if (pr->ps_sigacts->ps_sigignore & mask)
                              return;
                      if (p->p_sigmask & mask) {
                              action = SIG_HOLD;
                      } else if (pr->ps_sigacts->ps_sigcatch & mask) {
                              action = SIG_CATCH;
                      } else {
                              action = SIG_DFL;
      
                              if (prop & SA_KILL && pr->ps_nice > NZERO)
                                       pr->ps_nice = NZERO;
      
                              /*
                               * If sending a tty stop signal to a member of an
                               * orphaned process group, discard the signal here if
                               * the action is default; don't stop the process below
                               * if sleeping, and don't clear any pending SIGCONT.
                               */
                              if (prop & SA_TTYSTOP && pr->ps_pgrp->pg_jobc == 0)
                                      return;
                      }
              }
              /*
               * If delivered to process, mark as pending there.  Continue and stop
               * signals will be propagated to all threads.  So they are always
               * marked at thread level.
               */
              siglist = (type == SPROCESS) ? &pr->ps_siglist : &p->p_siglist;
              if (prop & SA_CONT) {
                      siglist = &p->p_siglist;
                      atomic_clearbits_int(siglist, stopsigmask);
              }
              if (prop & SA_STOP) {
                      siglist = &p->p_siglist;
                      atomic_clearbits_int(siglist, contsigmask);
                      atomic_clearbits_int(&p->p_flag, P_CONTINUED);
              }
              atomic_setbits_int(siglist, mask);
      
              /*
               * XXX delay processing of SA_STOP signals unless action == SIG_DFL?
               */
              if (prop & (SA_CONT | SA_STOP) && type != SPROPAGATED)
                      TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
                              if (q != p)
                                      ptsignal(q, signum, SPROPAGATED);
      
              /*
               * Defer further processing for signals which are held,
               * except that stopped processes must be continued by SIGCONT.
               */
              if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
                      return;
      
              SCHED_LOCK(s);
      
              switch (p->p_stat) {
      
              case SSLEEP:
                      /*
                       * If process is sleeping uninterruptibly
                       * we can't interrupt the sleep... the signal will
                       * be noticed when the process returns through
                       * trap() or syscall().
                       */
                      if ((p->p_flag & P_SINTR) == 0)
                              goto out;
                      /*
                       * Process is sleeping and traced... make it runnable
                       * so it can discover the signal in issignal() and stop
                       * for the parent.
                       */
                      if (pr->ps_flags & PS_TRACED)
                              goto run;
                      /*
                       * If SIGCONT is default (or ignored) and process is
                       * asleep, we are finished; the process should not
                       * be awakened.
                       */
                      if ((prop & SA_CONT) && action == SIG_DFL) {
                              atomic_clearbits_int(siglist, mask);
                              goto out;
                      }
                      /*
                       * When a sleeping process receives a stop
                       * signal, process immediately if possible.
                       */
                      if ((prop & SA_STOP) && action == SIG_DFL) {
                              /*
                               * If a child holding parent blocked,
                               * stopping could cause deadlock.
                               */
                              if (pr->ps_flags & PS_PPWAIT)
                                      goto out;
                              atomic_clearbits_int(siglist, mask);
                              p->p_xstat = signum;
                              proc_stop(p, 0);
                              goto out;
                      }
                      /*
                       * All other (caught or default) signals
                       * cause the process to run.
                       */
                      goto runfast;
                      /*NOTREACHED*/
      
              case SSTOP:
                      /*
                       * If traced process is already stopped,
                       * then no further action is necessary.
                       */
                      if (pr->ps_flags & PS_TRACED)
                              goto out;
      
                      /*
                       * Kill signal always sets processes running.
                       */
                      if (signum == SIGKILL) {
                              atomic_clearbits_int(&p->p_flag, P_SUSPSIG);
                              goto runfast;
                      }
      
                      if (prop & SA_CONT) {
                              /*
                               * If SIGCONT is default (or ignored), we continue the
                               * process but don't leave the signal in p_siglist, as
                               * it has no further action.  If SIGCONT is held, we
                               * continue the process and leave the signal in
                               * p_siglist.  If the process catches SIGCONT, let it
                               * handle the signal itself.  If it isn't waiting on
                               * an event, then it goes back to run state.
                               * Otherwise, process goes back to sleep state.
                               */
                              atomic_setbits_int(&p->p_flag, P_CONTINUED);
                              atomic_clearbits_int(&p->p_flag, P_SUSPSIG);
                              wakeparent = 1;
                              if (action == SIG_DFL)
                                      atomic_clearbits_int(siglist, mask);
                              if (action == SIG_CATCH)
                                      goto runfast;
                              if (p->p_wchan == 0)
                                      goto run;
                              p->p_stat = SSLEEP;
                              goto out;
                      }
      
                      if (prop & SA_STOP) {
                              /*
                               * Already stopped, don't need to stop again.
                               * (If we did the shell could get confused.)
                               */
                              atomic_clearbits_int(siglist, mask);
                              goto out;
                      }
      
                      /*
                       * If process is sleeping interruptibly, then simulate a
                       * wakeup so that when it is continued, it will be made
                       * runnable and can look at the signal.  But don't make
                       * the process runnable, leave it stopped.
                       */
                      if (p->p_wchan && p->p_flag & P_SINTR)
                              unsleep(p);
                      goto out;
      
              case SONPROC:
                      signotify(p);
                      /* FALLTHROUGH */
              default:
                      /*
                       * SRUN, SIDL, SDEAD do nothing with the signal,
                       * other than kicking ourselves if we are running.
                       * It will either never be noticed, or noticed very soon.
                       */
                      goto out;
              }
              /*NOTREACHED*/
      
      runfast:
              /*
               * Raise priority to at least PUSER.
               */
              if (p->p_priority > PUSER)
                      p->p_priority = PUSER;
      run:
              setrunnable(p);
      out:
              SCHED_UNLOCK(s);
              if (wakeparent)
                      wakeup(pr->ps_pptr);
      }
      
      /*
       * If the current process has received a signal (should be caught or cause
       * termination, should interrupt current syscall), return the signal number.
       * Stop signals with default action are processed immediately, then cleared;
       * they aren't returned.  This is checked after each entry to the system for
       * a syscall or trap (though this can usually be done without calling issignal
       * by checking the pending signal masks in the CURSIG macro.) The normal call
       * sequence is
       *
       *        while (signum = CURSIG(curproc))
       *                postsig(signum);
       *
       * Assumes that if the P_SINTR flag is set, we're holding both the
       * kernel and scheduler locks.
       */
      int
      issignal(struct proc *p)
      {
              struct process *pr = p->p_p;
              int signum, mask, prop;
              int dolock = (p->p_flag & P_SINTR) == 0;
              int s;
      
              for (;;) {
                      mask = SIGPENDING(p);
                      if (pr->ps_flags & PS_PPWAIT)
                              mask &= ~stopsigmask;
                      if (mask == 0)                 /* no signal to send */
                              return (0);
                      signum = ffs((long)mask);
                      mask = sigmask(signum);
                      atomic_clearbits_int(&p->p_siglist, mask);
                      atomic_clearbits_int(&p->p_p->ps_siglist, mask);
      
                      /*
                       * We should see pending but ignored signals
                       * only if PS_TRACED was on when they were posted.
                       */
                      if (mask & pr->ps_sigacts->ps_sigignore &&
                          (pr->ps_flags & PS_TRACED) == 0)
                              continue;
      
                      /*
                       * If traced, always stop, and stay stopped until released
                       * by the debugger.  If our parent process is waiting for
                       * us, don't hang as we could deadlock.
                       */
                      if (((pr->ps_flags & (PS_TRACED | PS_PPWAIT)) == PS_TRACED) &&
                          signum != SIGKILL) {
                              p->p_xstat = signum;
      
                              if (dolock)
                                      KERNEL_LOCK();
                              single_thread_set(p, SINGLE_PTRACE, 0);
                              if (dolock)
                                      KERNEL_UNLOCK();
      
                              if (dolock)
                                      SCHED_LOCK(s);
                              proc_stop(p, 1);
                              if (dolock)
                                      SCHED_UNLOCK(s);
      
                              if (dolock)
                                      KERNEL_LOCK();
                              single_thread_clear(p, 0);
                              if (dolock)
                                      KERNEL_UNLOCK();
      
                              /*
                               * If we are no longer being traced, or the parent
                               * didn't give us a signal, look for more signals.
                               */
                              if ((pr->ps_flags & PS_TRACED) == 0 || p->p_xstat == 0)
                                      continue;
      
                              /*
                               * If the new signal is being masked, look for other
                               * signals.
                               */
                              signum = p->p_xstat;
                              mask = sigmask(signum);
                              if ((p->p_sigmask & mask) != 0)
                                      continue;
      
                              /* take the signal! */
                              atomic_clearbits_int(&p->p_siglist, mask);
                              atomic_clearbits_int(&p->p_p->ps_siglist, mask);
                      }
      
                      prop = sigprop[signum];
      
                      /*
                       * Decide whether the signal should be returned.
                       * Return the signal's number, or fall through
                       * to clear it from the pending mask.
                       */
                      switch ((long)pr->ps_sigacts->ps_sigact[signum]) {
                      case (long)SIG_DFL:
                              /*
                               * Don't take default actions on system processes.
                               */
                              if (pr->ps_pid <= 1) {
      #ifdef DIAGNOSTIC
                                      /*
                                       * Are you sure you want to ignore SIGSEGV
                                       * in init? XXX
                                       */
                                      printf("Process (pid %d) got signal"
                                          " %d\n", pr->ps_pid, signum);
      #endif
                                      break;                /* == ignore */
                              }
                              /*
                               * If there is a pending stop signal to process
                               * with default action, stop here,
                               * then clear the signal.  However,
                               * if process is member of an orphaned
                               * process group, ignore tty stop signals.
                               */
                              if (prop & SA_STOP) {
                                      if (pr->ps_flags & PS_TRACED ||
                                              (pr->ps_pgrp->pg_jobc == 0 &&
                                          prop & SA_TTYSTOP))
                                              break;        /* == ignore */
                                      p->p_xstat = signum;
                                      if (dolock)
                                              SCHED_LOCK(s);
                                      proc_stop(p, 1);
                                      if (dolock)
                                              SCHED_UNLOCK(s);
                                      break;
                              } else if (prop & SA_IGNORE) {
                                      /*
                                       * Except for SIGCONT, shouldn't get here.
                                       * Default action is to ignore; drop it.
                                       */
                                      break;                /* == ignore */
                              } else
                                      goto keep;
                              /*NOTREACHED*/
                      case (long)SIG_IGN:
                              /*
                               * Masking above should prevent us ever trying
                               * to take action on an ignored signal other
                               * than SIGCONT, unless process is traced.
                               */
                              if ((prop & SA_CONT) == 0 &&
                                  (pr->ps_flags & PS_TRACED) == 0)
                                      printf("issignal\n");
                              break;                /* == ignore */
                      default:
                              /*
                               * This signal has an action, let
                               * postsig() process it.
                               */
                              goto keep;
                      }
              }
              /* NOTREACHED */
      
      keep:
              atomic_setbits_int(&p->p_siglist, mask); /*leave the signal for later */
              return (signum);
      }
      
      /*
       * Put the argument process into the stopped state and notify the parent
       * via wakeup.  Signals are handled elsewhere.  The process must not be
       * on the run queue.
       */
      void
      proc_stop(struct proc *p, int sw)
      {
              struct process *pr = p->p_p;
              extern void *softclock_si;
      
      #ifdef MULTIPROCESSOR
              SCHED_ASSERT_LOCKED();
      #endif
      
              p->p_stat = SSTOP;
              atomic_clearbits_int(&pr->ps_flags, PS_WAITED);
              atomic_setbits_int(&pr->ps_flags, PS_STOPPED);
              atomic_setbits_int(&p->p_flag, P_SUSPSIG);
              if (!timeout_pending(&proc_stop_to)) {
                      timeout_add(&proc_stop_to, 0);
                      /*
                       * We need this soft interrupt to be handled fast.
                       * Extra calls to softclock don't hurt.
                       */
                      softintr_schedule(softclock_si);
              }
              if (sw)
                      mi_switch();
      }
      
      /*
       * Called from a timeout to send signals to the parents of stopped processes.
       * We can't do this in proc_stop because it's called with nasty locks held
       * and we would need recursive scheduler lock to deal with that.
       */
      void
      proc_stop_sweep(void *v)
      {
              struct process *pr;
      
              LIST_FOREACH(pr, &allprocess, ps_list) {
                      if ((pr->ps_flags & PS_STOPPED) == 0)
                              continue;
                      atomic_clearbits_int(&pr->ps_flags, PS_STOPPED);
      
                      if ((pr->ps_pptr->ps_sigacts->ps_flags & SAS_NOCLDSTOP) == 0)
                              prsignal(pr->ps_pptr, SIGCHLD);
                      wakeup(pr->ps_pptr);
              }
      }
      
      /*
       * Take the action for the specified signal
       * from the current set of pending signals.
       */
      void
      postsig(struct proc *p, int signum)
      {
              struct process *pr = p->p_p;
              struct sigacts *ps = pr->ps_sigacts;
              sig_t action;
              u_long trapno;
              int mask, returnmask;
              siginfo_t si;
              union sigval sigval;
              int s, code;
      
              KASSERT(signum != 0);
              KERNEL_ASSERT_LOCKED();
      
              mask = sigmask(signum);
              atomic_clearbits_int(&p->p_siglist, mask);
              action = ps->ps_sigact[signum];
              sigval.sival_ptr = 0;
      
              if (p->p_sisig != signum) {
                      trapno = 0;
                      code = SI_USER;
                      sigval.sival_ptr = 0;
              } else {
                      trapno = p->p_sitrapno;
                      code = p->p_sicode;
                      sigval = p->p_sigval;
              }
              initsiginfo(&si, signum, trapno, code, sigval);
      
      #ifdef KTRACE
              if (KTRPOINT(p, KTR_PSIG)) {
                      ktrpsig(p, signum, action, p->p_flag & P_SIGSUSPEND ?
                          p->p_oldmask : p->p_sigmask, code, &si);
              }
      #endif
              if (action == SIG_DFL) {
                      /*
                       * Default action, where the default is to kill
                       * the process.  (Other cases were ignored above.)
                       */
                      sigexit(p, signum);
                      /* NOTREACHED */
              } else {
                      /*
                       * If we get here, the signal must be caught.
                       */
      #ifdef DIAGNOSTIC
                      if (action == SIG_IGN || (p->p_sigmask & mask))
                              panic("postsig action");
      #endif
                      /*
                       * Set the new mask value and also defer further
                       * occurrences of this signal.
                       *
                       * Special case: user has done a sigpause.  Here the
                       * current mask is not of interest, but rather the
                       * mask from before the sigpause is what we want
                       * restored after the signal processing is completed.
                       */
      #ifdef MULTIPROCESSOR
                      s = splsched();
      #else
                      s = splhigh();
      #endif
                      if (p->p_flag & P_SIGSUSPEND) {
                              atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND);
                              returnmask = p->p_oldmask;
                      } else {
                              returnmask = p->p_sigmask;
                      }
                      if (p->p_sisig == signum) {
                              p->p_sisig = 0;
                              p->p_sitrapno = 0;
                              p->p_sicode = SI_USER;
                              p->p_sigval.sival_ptr = NULL;
                      }
      
                      sendsig(action, signum, returnmask, &si);
                      postsig_done(p, signum, ps);
                      splx(s);
              }
      }
      
      /*
       * Force the current process to exit with the specified signal, dumping core
       * if appropriate.  We bypass the normal tests for masked and caught signals,
       * allowing unrecoverable failures to terminate the process without changing
       * signal state.  Mark the accounting record with the signal termination.
       * If dumping core, save the signal number for the debugger.  Calls exit and
       * does not return.
       */
      void
      sigexit(struct proc *p, int signum)
      {
              /* Mark process as going away */
              atomic_setbits_int(&p->p_flag, P_WEXIT);
      
              p->p_p->ps_acflag |= AXSIG;
              if (sigprop[signum] & SA_CORE) {
                      p->p_sisig = signum;
      
                      /* if there are other threads, pause them */
                      if (P_HASSIBLING(p))
                              single_thread_set(p, SINGLE_SUSPEND, 0);
      
                      if (coredump(p) == 0)
                              signum |= WCOREFLAG;
              }
              exit1(p, W_EXITCODE(0, signum), EXIT_NORMAL);
              /* NOTREACHED */
      }
      
      int nosuidcoredump = 1;
      
      struct coredump_iostate {
              struct proc *io_proc;
              struct vnode *io_vp;
              struct ucred *io_cred;
              off_t io_offset;
      };
      
      /*
       * Dump core, into a file named "progname.core", unless the process was
       * setuid/setgid.
       */
      int
      coredump(struct proc *p)
      {
      #ifdef SMALL_KERNEL
              return EPERM;
      #else
              struct process *pr = p->p_p;
              struct vnode *vp;
              struct ucred *cred = p->p_ucred;
              struct vmspace *vm = p->p_vmspace;
              struct nameidata nd;
              struct vattr vattr;
              struct coredump_iostate        io;
              int error, len, incrash = 0;
              char *name;
              const char *dir = "/var/crash";
      
              if (pr->ps_emul->e_coredump == NULL)
                      return (EINVAL);
      
              pr->ps_flags |= PS_COREDUMP;
      
              /*
               * If the process has inconsistent uids, nosuidcoredump
               * determines coredump placement policy.
               */
              if (((pr->ps_flags & PS_SUGID) && (error = suser(p))) ||
                 ((pr->ps_flags & PS_SUGID) && nosuidcoredump)) {
                      if (nosuidcoredump == 3 || nosuidcoredump == 2)
                              incrash = 1;
                      else
                              return (EPERM);
              }
      
              /* Don't dump if will exceed file size limit. */
              if (USPACE + ptoa(vm->vm_dsize + vm->vm_ssize) >= lim_cur(RLIMIT_CORE))
                      return (EFBIG);
      
              name = pool_get(&namei_pool, PR_WAITOK);
      
              if (incrash && nosuidcoredump == 3) {
                      /*
                       * If the program directory does not exist, dumps of
                       * that core will silently fail.
                       */
                      len = snprintf(name, MAXPATHLEN, "%s/%s/%u.core",
                          dir, pr->ps_comm, pr->ps_pid);
              } else if (incrash && nosuidcoredump == 2)
                      len = snprintf(name, MAXPATHLEN, "%s/%s.core",
                          dir, pr->ps_comm);
              else
                      len = snprintf(name, MAXPATHLEN, "%s.core", pr->ps_comm);
              if (len >= MAXPATHLEN) {
                      pool_put(&namei_pool, name);
                      return (EACCES);
              }
      
              /*
               * Control the UID used to write out.  The normal case uses
               * the real UID.  If the sugid case is going to write into the
               * controlled directory, we do so as root.
               */
              if (incrash == 0) {
                      cred = crdup(cred);
                      cred->cr_uid = cred->cr_ruid;
                      cred->cr_gid = cred->cr_rgid;
              } else {
                      if (p->p_fd->fd_rdir) {
                              vrele(p->p_fd->fd_rdir);
                              p->p_fd->fd_rdir = NULL;
                      }
                      p->p_ucred = crdup(p->p_ucred);
                      crfree(cred);
                      cred = p->p_ucred;
                      crhold(cred);
                      cred->cr_uid = 0;
                      cred->cr_gid = 0;
              }
      
              NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p);
      
              error = vn_open(&nd, O_CREAT | FWRITE | O_NOFOLLOW | O_NONBLOCK,
                  S_IRUSR | S_IWUSR);
      
              if (error)
                      goto out;
      
              /*
               * Don't dump to non-regular files, files with links, or files
               * owned by someone else.
               */
              vp = nd.ni_vp;
              if ((error = VOP_GETATTR(vp, &vattr, cred, p)) != 0) {
                      VOP_UNLOCK(vp);
                      vn_close(vp, FWRITE, cred, p);
                      goto out;
              }
              if (vp->v_type != VREG || vattr.va_nlink != 1 ||
                  vattr.va_mode & ((VREAD | VWRITE) >> 3 | (VREAD | VWRITE) >> 6) ||
                  vattr.va_uid != cred->cr_uid) {
                      error = EACCES;
                      VOP_UNLOCK(vp);
                      vn_close(vp, FWRITE, cred, p);
                      goto out;
              }
              VATTR_NULL(&vattr);
              vattr.va_size = 0;
              VOP_SETATTR(vp, &vattr, cred, p);
              pr->ps_acflag |= ACORE;
      
              io.io_proc = p;
              io.io_vp = vp;
              io.io_cred = cred;
              io.io_offset = 0;
              VOP_UNLOCK(vp);
              vref(vp);
              error = vn_close(vp, FWRITE, cred, p);
              if (error == 0)
                      error = (*pr->ps_emul->e_coredump)(p, &io);
              vrele(vp);
      out:
              crfree(cred);
              pool_put(&namei_pool, name);
              return (error);
      #endif
      }
      
      #ifndef SMALL_KERNEL
      int
      coredump_write(void *cookie, enum uio_seg segflg, const void *data, size_t len)
      {
              struct coredump_iostate *io = cookie;
              off_t coffset = 0;
              size_t csize;
              int chunk, error;
      
              csize = len;
              do {
                      if (sigmask(SIGKILL) &
                          (io->io_proc->p_siglist | io->io_proc->p_p->ps_siglist))
                              return (EINTR);
      
                      /* Rest of the loop sleeps with lock held, so... */
                      yield();
      
                      chunk = MIN(csize, MAXPHYS);
                      error = vn_rdwr(UIO_WRITE, io->io_vp,
                          (caddr_t)data + coffset, chunk,
                          io->io_offset + coffset, segflg,
                          IO_UNIT, io->io_cred, NULL, io->io_proc);
                      if (error) {
                              struct process *pr = io->io_proc->p_p;
      
                              if (error == ENOSPC)
                                      log(LOG_ERR,
                                          "coredump of %s(%d) failed, filesystem full\n",
                                          pr->ps_comm, pr->ps_pid);
                              else
                                      log(LOG_ERR,
                                          "coredump of %s(%d), write failed: errno %d\n",
                                          pr->ps_comm, pr->ps_pid, error);
                              return (error);
                      }
      
                      coffset += chunk;
                      csize -= chunk;
              } while (csize > 0);
      
              io->io_offset += len;
              return (0);
      }
      
      void
      coredump_unmap(void *cookie, vaddr_t start, vaddr_t end)
      {
              struct coredump_iostate *io = cookie;
      
              uvm_unmap(&io->io_proc->p_vmspace->vm_map, start, end);
      }
      
      #endif        /* !SMALL_KERNEL */
      
      /*
       * Nonexistent system call-- signal process (may want to handle it).
       * Flag error in case process won't see signal immediately (blocked or ignored).
       */
      int
      sys_nosys(struct proc *p, void *v, register_t *retval)
      {
      
              ptsignal(p, SIGSYS, STHREAD);
              return (ENOSYS);
      }
      
      int
      sys___thrsigdivert(struct proc *p, void *v, register_t *retval)
      {
              static int sigwaitsleep;
              struct sys___thrsigdivert_args /* {
                      syscallarg(sigset_t) sigmask;
                      syscallarg(siginfo_t *) info;
                      syscallarg(const struct timespec *) timeout;
              } */ *uap = v;
              struct process *pr = p->p_p;
              sigset_t *m;
              sigset_t mask = SCARG(uap, sigmask) &~ sigcantmask;
              siginfo_t si;
              uint64_t to_ticks = 0;
              int timeinvalid = 0;
              int error = 0;
      
              memset(&si, 0, sizeof(si));
      
              if (SCARG(uap, timeout) != NULL) {
                      struct timespec ts;
                      if ((error = copyin(SCARG(uap, timeout), &ts, sizeof(ts))) != 0)
                              return (error);
      #ifdef KTRACE
                      if (KTRPOINT(p, KTR_STRUCT))
                              ktrreltimespec(p, &ts);
      #endif
                      if (!timespecisvalid(&ts))
                              timeinvalid = 1;
                      else {
                              to_ticks = (uint64_t)hz * ts.tv_sec +
                                  ts.tv_nsec / (tick * 1000);
                              if (to_ticks > INT_MAX)
                                      to_ticks = INT_MAX;
                              if (to_ticks == 0 && ts.tv_nsec)
                                      to_ticks = 1;
                      }
              }
      
              dosigsuspend(p, p->p_sigmask &~ mask);
              for (;;) {
                      si.si_signo = CURSIG(p);
                      if (si.si_signo != 0) {
                              sigset_t smask = sigmask(si.si_signo);
                              if (smask & mask) {
                                      if (p->p_siglist & smask)
                                              m = &p->p_siglist;
                                      else if (pr->ps_siglist & smask)
                                              m = &pr->ps_siglist;
                                      else {
                                              /* signal got eaten by someone else? */
                                              continue;
                                      }
                                      atomic_clearbits_int(m, smask);
                                      error = 0;
                                      break;
                              }
                      }
      
                      /* per-POSIX, delay this error until after the above */
                      if (timeinvalid)
                              error = EINVAL;
      
                      if (SCARG(uap, timeout) != NULL && to_ticks == 0)
                              error = EAGAIN;
      
                      if (error != 0)
                              break;
      
                      error = tsleep(&sigwaitsleep, PPAUSE|PCATCH, "sigwait",
                          (int)to_ticks);
              }
      
              if (error == 0) {
                      *retval = si.si_signo;
                      if (SCARG(uap, info) != NULL)
                              error = copyout(&si, SCARG(uap, info), sizeof(si));
              } else if (error == ERESTART && SCARG(uap, timeout) != NULL) {
                      /*
                       * Restarting is wrong if there's a timeout, as it'll be
                       * for the same interval again
                       */
                      error = EINTR;
              }
      
              return (error);
      }
      
      void
      initsiginfo(siginfo_t *si, int sig, u_long trapno, int code, union sigval val)
      {
              memset(si, 0, sizeof(*si));
      
              si->si_signo = sig;
              si->si_code = code;
              if (code == SI_USER) {
                      si->si_value = val;
              } else {
                      switch (sig) {
                      case SIGSEGV:
                      case SIGILL:
                      case SIGBUS:
                      case SIGFPE:
                              si->si_addr = val.sival_ptr;
                              si->si_trapno = trapno;
                              break;
                      case SIGXFSZ:
                              break;
                      }
              }
      }
      
      int
      filt_sigattach(struct knote *kn)
    5 {
              struct process *pr = curproc->p_p;
      
    1         if (kn->kn_id >= NSIG)
                      return EINVAL;
      
    4         kn->kn_ptr.p_process = pr;
              kn->kn_flags |= EV_CLEAR;                /* automatically set */
      
              /* XXX lock the proc here while adding to the list? */
              SLIST_INSERT_HEAD(&pr->ps_klist, kn, kn_selnext);
      
              return (0);
      }
      
      void
      filt_sigdetach(struct knote *kn)
    8 {
              struct process *pr = kn->kn_ptr.p_process;
      
    8         SLIST_REMOVE(&pr->ps_klist, kn, knote, kn_selnext);
      }
      
      /*
       * signal knotes are shared with proc knotes, so we apply a mask to
       * the hint in order to differentiate them from process hints.  This
       * could be avoided by using a signal-specific knote list, but probably
       * isn't worth the trouble.
       */
      int
      filt_signal(struct knote *kn, long hint)
    5 {
      
    4         if (hint & NOTE_SIGNAL) {
                      hint &= ~NOTE_SIGNAL;
      
    1                 if (kn->kn_id == hint)
                              kn->kn_data++;
              }
              return (kn->kn_data != 0);
      }
      
      void
      userret(struct proc *p)
 3657 {
              int signum;
      
              /* send SIGPROF or SIGVTALRM if their timers interrupted this thread */
 3657         if (p->p_flag & P_PROFPEND) {
                      atomic_clearbits_int(&p->p_flag, P_PROFPEND);
                      KERNEL_LOCK();
                      psignal(p, SIGPROF);
                      KERNEL_UNLOCK();
              }
 3657         if (p->p_flag & P_ALRMPEND) {
                      atomic_clearbits_int(&p->p_flag, P_ALRMPEND);
                      KERNEL_LOCK();
                      psignal(p, SIGVTALRM);
                      KERNEL_UNLOCK();
              }
      
 3657         if (SIGPENDING(p) != 0) {
                      KERNEL_LOCK();
                      while ((signum = CURSIG(p)) != 0)
                              postsig(p, signum);
                      KERNEL_UNLOCK();
              }
      
              /*
               * If P_SIGSUSPEND is still set here, then we still need to restore
               * the original sigmask before returning to userspace.  Also, this
               * might unmask some pending signals, so we need to check a second
               * time for signals to post.
               */
 3657         if (p->p_flag & P_SIGSUSPEND) {
                      atomic_clearbits_int(&p->p_flag, P_SIGSUSPEND);
                      p->p_sigmask = p->p_oldmask;
      
                      KERNEL_LOCK();
                      while ((signum = CURSIG(p)) != 0)
                              postsig(p, signum);
                      KERNEL_UNLOCK();
              }
      
 3636         if (p->p_flag & P_SUSPSINGLE) {
   72                 KERNEL_LOCK();
                      single_thread_check(p, 0);
                      KERNEL_UNLOCK();
              }
      
              WITNESS_WARN(WARN_PANIC, NULL, "userret: returning");
      
              p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
      }
      
      int
      single_thread_check(struct proc *p, int deep)
  362 {
              struct process *pr = p->p_p;
      
  310         if (pr->ps_single != NULL && pr->ps_single != p) {
                      do {
                              int s;
      
                              /* if we're in deep, we need to unwind to the edge */
   72                         if (deep) {
   67                                 if (pr->ps_flags & PS_SINGLEUNWIND)
                                              return (ERESTART);
                                      if (pr->ps_flags & PS_SINGLEEXIT)
                                              return (EINTR);
                              }
      
   36                         if (--pr->ps_singlecount == 0)
   66                                 wakeup(&pr->ps_singlecount);
   72                         if (pr->ps_flags & PS_SINGLEEXIT)
                                      exit1(p, 0, EXIT_THREAD_NOCHECK);
      
                              /* not exiting and don't need to unwind, so suspend */
                              SCHED_LOCK(s);
                              p->p_stat = SSTOP;
                              mi_switch();
                              SCHED_UNLOCK(s);
   72                 } while (pr->ps_single != NULL);
              }
      
              return (0);
      }
      
      /*
       * Stop other threads in the process.  The mode controls how and
       * where the other threads should stop:
       *  - SINGLE_SUSPEND: stop wherever they are, will later either be told to exit
       *    (by setting to SINGLE_EXIT) or be released (via single_thread_clear())
       *  - SINGLE_PTRACE: stop wherever they are, will wait for them to stop
       *    later (via single_thread_wait()) and released as with SINGLE_SUSPEND
       *  - SINGLE_UNWIND: just unwind to kernel boundary, will be told to exit
       *    or released as with SINGLE_SUSPEND
       *  - SINGLE_EXIT: unwind to kernel boundary and exit
       */
      int
      single_thread_set(struct proc *p, enum single_thread_mode mode, int deep)
   45 {
              struct process *pr = p->p_p;
              struct proc *q;
              int error;
      
              KERNEL_ASSERT_LOCKED();
      
              if ((error = single_thread_check(p, deep)))
                      return error;
      
              switch (mode) {
              case SINGLE_SUSPEND:
              case SINGLE_PTRACE:
                      break;
              case SINGLE_UNWIND:
   45                 atomic_setbits_int(&pr->ps_flags, PS_SINGLEUNWIND);
                      break;
              case SINGLE_EXIT:
                      atomic_setbits_int(&pr->ps_flags, PS_SINGLEEXIT);
                      atomic_clearbits_int(&pr->ps_flags, PS_SINGLEUNWIND);
                      break;
      #ifdef DIAGNOSTIC
              default:
                      panic("single_thread_mode = %d", mode);
      #endif
              }
              pr->ps_single = p;
              pr->ps_singlecount = 0;
   45         TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
                      int s;
      
   45                 if (q == p)
                              continue;
                      if (q->p_flag & P_WEXIT) {
                              if (mode == SINGLE_EXIT) {
                                      SCHED_LOCK(s);
                                      if (q->p_stat == SSTOP) {
                                              setrunnable(q);
                                              pr->ps_singlecount++;
                                      }
                                      SCHED_UNLOCK(s);
                              }
                              continue;
                      }
                      SCHED_LOCK(s);
                      atomic_setbits_int(&q->p_flag, P_SUSPSINGLE);
   20                 switch (q->p_stat) {
                      case SIDL:
                      case SRUN:
                              pr->ps_singlecount++;
                              break;
                      case SSLEEP:
                              /* if it's not interruptible, then just have to wait */
    4                         if (q->p_flag & P_SINTR) {
                                      /* merely need to suspend?  just stop it */
                                      if (mode == SINGLE_SUSPEND ||
                                          mode == SINGLE_PTRACE) {
                                              q->p_stat = SSTOP;
                                              break;
                                      }
                                      /* need to unwind or exit, so wake it */
    6                                 setrunnable(q);
                              }
                              pr->ps_singlecount++;
                              break;
                      case SSTOP:
                              if (mode == SINGLE_EXIT) {
                                      setrunnable(q);
                                      pr->ps_singlecount++;
                              }
                              break;
                      case SDEAD:
                              break;
                      case SONPROC:
   42                         pr->ps_singlecount++;
                              signotify(q);
                              break;
                      }
                      SCHED_UNLOCK(s);
              }
      
              if (mode != SINGLE_PTRACE)
   45                 single_thread_wait(pr);
      
              return 0;
      }
      
      void
      single_thread_wait(struct process *pr)
      {
              /* wait until they're all suspended */
   45         while (pr->ps_singlecount > 0)
                      tsleep(&pr->ps_singlecount, PWAIT, "suspend", 0);
      }
      
      void
      single_thread_clear(struct proc *p, int flag)
   45 {
              struct process *pr = p->p_p;
              struct proc *q;
      
              KASSERT(pr->ps_single == p);
              KERNEL_ASSERT_LOCKED();
      
              pr->ps_single = NULL;
              atomic_clearbits_int(&pr->ps_flags, PS_SINGLEUNWIND | PS_SINGLEEXIT);
   45         TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
                      int s;
      
   45                 if (q == p || (q->p_flag & P_SUSPSINGLE) == 0)
                              continue;
                      atomic_clearbits_int(&q->p_flag, P_SUSPSINGLE);
      
                      /*
                       * if the thread was only stopped for single threading
                       * then clearing that either makes it runnable or puts
                       * it back into some sleep queue
                       */
                      SCHED_LOCK(s);
                      if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
                              if (q->p_wchan == 0)
   45                                 setrunnable(q);
                              else
                                      q->p_stat = SSLEEP;
                      }
                      SCHED_UNLOCK(s);
              }
      }
      
      void
      sigio_del(struct sigiolst *rmlist)
      {
              struct sigio *sigio;
      
   11         while ((sigio = LIST_FIRST(rmlist)) != NULL) {
   10                 LIST_REMOVE(sigio, sio_pgsigio);
                      crfree(sigio->sio_ucred);
                      free(sigio, M_SIGIO, sizeof(*sigio));
              }
      }
      
      void
      sigio_unlink(struct sigio_ref *sir, struct sigiolst *rmlist)
   11 {
              struct sigio *sigio;
      
              MUTEX_ASSERT_LOCKED(&sigio_lock);
      
              sigio = sir->sir_sigio;
    4         if (sigio != NULL) {
                      KASSERT(sigio->sio_myref == sir);
                      sir->sir_sigio = NULL;
      
                      if (sigio->sio_pgid > 0)
   10                         sigio->sio_proc = NULL;
                      else
                              sigio->sio_pgrp = NULL;
   10                 LIST_REMOVE(sigio, sio_pgsigio);
      
   10                 LIST_INSERT_HEAD(rmlist, sigio, sio_pgsigio);
              }
      }
      
      void
      sigio_free(struct sigio_ref *sir)
  117 {
              struct sigiolst rmlist;
      
  112         if (sir->sir_sigio == NULL)
                      return;
      
              LIST_INIT(&rmlist);
      
              mtx_enter(&sigio_lock);
              sigio_unlink(sir, &rmlist);
              mtx_leave(&sigio_lock);
      
    7         sigio_del(&rmlist);
      }
      
      void
      sigio_freelist(struct sigiolst *sigiolst)
    1 {
              struct sigiolst rmlist;
              struct sigio *sigio;
      
    1         if (LIST_EMPTY(sigiolst))
                      return;
      
              LIST_INIT(&rmlist);
      
              mtx_enter(&sigio_lock);
              while ((sigio = LIST_FIRST(sigiolst)) != NULL)
                      sigio_unlink(sigio->sio_myref, &rmlist);
              mtx_leave(&sigio_lock);
      
              sigio_del(&rmlist);
      }
      
      int
      sigio_setown(struct sigio_ref *sir, pid_t pgid)
   21 {
              struct sigiolst rmlist;
              struct proc *p = curproc;
              struct pgrp *pgrp = NULL;
              struct process *pr = NULL;
              struct sigio *sigio;
              int error;
      
              if (pgid == 0) {
    9                 sigio_free(sir);
                      return (0);
              }
      
              sigio = malloc(sizeof(*sigio), M_SIGIO, M_WAITOK);
              sigio->sio_pgid = pgid;
              sigio->sio_ucred = crhold(p->p_ucred);
              sigio->sio_myref = sir;
      
              LIST_INIT(&rmlist);
      
              /*
               * The kernel lock, and not sleeping between prfind()/pgfind() and
               * linking of the sigio ensure that the process or process group does
               * not disappear unexpectedly.
               */
              KERNEL_LOCK();
              mtx_enter(&sigio_lock);
      
              if (pgid > 0) {
                      pr = prfind(pgid);
    2                 if (pr == NULL) {
                              error = ESRCH;
                              goto fail;
                      }
      
                      /*
                       * Policy - Don't allow a process to FSETOWN a process
                       * in another session.
                       *
                       * Remove this test to allow maximum flexibility or
                       * restrict FSETOWN to the current process or process
                       * group for maximum safety.
                       */
    2                 if (pr->ps_session != p->p_p->ps_session) {
                              error = EPERM;
                              goto fail;
                      }
      
                      if ((pr->ps_flags & PS_EXITING) != 0) {
                              error = ESRCH;
                              goto fail;
                      }
              } else /* if (pgid < 0) */ {
                      pgrp = pgfind(-pgid);
    2                 if (pgrp == NULL) {
                              error = ESRCH;
                              goto fail;
                      }
      
                      /*
                       * Policy - Don't allow a process to FSETOWN a process
                       * in another session.
                       *
                       * Remove this test to allow maximum flexibility or
                       * restrict FSETOWN to the current process or process
                       * group for maximum safety.
                       */
    1                 if (pgrp->pg_session != p->p_p->ps_session) {
                              error = EPERM;
                              goto fail;
                      }
              }
      
              if (pgid > 0) {
                      sigio->sio_proc = pr;
    5                 LIST_INSERT_HEAD(&pr->ps_sigiolst, sigio, sio_pgsigio);
              } else {
                      sigio->sio_pgrp = pgrp;
                      LIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
              }
      
              sigio_unlink(sir, &rmlist);
              sir->sir_sigio = sigio;
      
              mtx_leave(&sigio_lock);
              KERNEL_UNLOCK();
      
    5         sigio_del(&rmlist);
      
              return (0);
      
      fail:
              mtx_leave(&sigio_lock);
              KERNEL_UNLOCK();
      
              crfree(sigio->sio_ucred);
              free(sigio, M_SIGIO, sizeof(*sigio));
      
              return (error);
      }
      
      pid_t
      sigio_getown(struct sigio_ref *sir)
    7 {
              struct sigio *sigio;
              pid_t pgid = 0;
      
              mtx_enter(&sigio_lock);
              sigio = sir->sir_sigio;
    6         if (sigio != NULL)
    1                 pgid = sigio->sio_pgid;
              mtx_leave(&sigio_lock);
      
              return (pgid);
      }
      
      void
      sigio_copy(struct sigio_ref *dst, struct sigio_ref *src)
      {
              struct sigiolst rmlist;
              struct sigio *newsigio, *sigio;
      
              sigio_free(dst);
      
              if (src->sir_sigio == NULL)
                      return;
      
              newsigio = malloc(sizeof(*newsigio), M_SIGIO, M_WAITOK);
              LIST_INIT(&rmlist);
      
              mtx_enter(&sigio_lock);
      
              sigio = src->sir_sigio;
              if (sigio == NULL) {
                      mtx_leave(&sigio_lock);
                      free(newsigio, M_SIGIO, sizeof(*newsigio));
                      return;
              }
      
              newsigio->sio_pgid = sigio->sio_pgid;
              newsigio->sio_ucred = crhold(sigio->sio_ucred);
              newsigio->sio_myref = dst;
              if (newsigio->sio_pgid > 0) {
                      newsigio->sio_proc = sigio->sio_proc;
                      LIST_INSERT_HEAD(&newsigio->sio_proc->ps_sigiolst, newsigio,
                          sio_pgsigio);
              } else {
                      newsigio->sio_pgrp = sigio->sio_pgrp;
                      LIST_INSERT_HEAD(&newsigio->sio_pgrp->pg_sigiolst, newsigio,
                          sio_pgsigio);
              }
      
              sigio_unlink(dst, &rmlist);
              dst->sir_sigio = newsigio;
      
              mtx_leave(&sigio_lock);
      
              sigio_del(&rmlist);
      }
      /*        $OpenBSD: time.h,v 1.46 2019/08/03 22:53:45 cheloha Exp $        */
      /*        $NetBSD: time.h,v 1.18 1996/04/23 10:29:33 mycroft Exp $        */
      
      /*
       * Copyright (c) 1982, 1986, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)time.h        8.2 (Berkeley) 7/10/94
       */
      
      #ifndef _SYS_TIME_H_
      #define _SYS_TIME_H_
      
      #include <sys/select.h>
      
      #ifndef _TIMEVAL_DECLARED
      #define _TIMEVAL_DECLARED
      /*
       * Structure returned by gettimeofday(2) system call,
       * and used in other calls.
       */
      struct timeval {
              time_t                tv_sec;                /* seconds */
              suseconds_t        tv_usec;        /* and microseconds */
      };
      #endif
      
      #ifndef _TIMESPEC_DECLARED
      #define _TIMESPEC_DECLARED
      /*
       * Structure defined by POSIX.1b to be like a timeval.
       */
      struct timespec {
              time_t        tv_sec;                /* seconds */
              long        tv_nsec;        /* and nanoseconds */
      };
      #endif
      
      #define        TIMEVAL_TO_TIMESPEC(tv, ts) do {                                \
              (ts)->tv_sec = (tv)->tv_sec;                                        \
              (ts)->tv_nsec = (tv)->tv_usec * 1000;                                \
      } while (0)
      #define        TIMESPEC_TO_TIMEVAL(tv, ts) do {                                \
              (tv)->tv_sec = (ts)->tv_sec;                                        \
              (tv)->tv_usec = (ts)->tv_nsec / 1000;                                \
      } while (0)
      
      struct timezone {
              int        tz_minuteswest;        /* minutes west of Greenwich */
              int        tz_dsttime;        /* type of dst correction */
      };
      #define        DST_NONE        0        /* not on dst */
      #define        DST_USA                1        /* USA style dst */
      #define        DST_AUST        2        /* Australian style dst */
      #define        DST_WET                3        /* Western European dst */
      #define        DST_MET                4        /* Middle European dst */
      #define        DST_EET                5        /* Eastern European dst */
      #define        DST_CAN                6        /* Canada */
      
      /* Operations on timevals. */
      #define        timerclear(tvp)                (tvp)->tv_sec = (tvp)->tv_usec = 0
      #define        timerisset(tvp)                ((tvp)->tv_sec || (tvp)->tv_usec)
      #define        timerisvalid(tvp)                                                \
              ((tvp)->tv_usec >= 0 && (tvp)->tv_usec < 1000000)
      #define        timercmp(tvp, uvp, cmp)                                                \
              (((tvp)->tv_sec == (uvp)->tv_sec) ?                                \
                  ((tvp)->tv_usec cmp (uvp)->tv_usec) :                        \
                  ((tvp)->tv_sec cmp (uvp)->tv_sec))
      #define        timeradd(tvp, uvp, vvp)                                                \
              do {                                                                \
                      (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;                \
                      (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec;        \
                      if ((vvp)->tv_usec >= 1000000) {                        \
                              (vvp)->tv_sec++;                                \
                              (vvp)->tv_usec -= 1000000;                        \
                      }                                                        \
              } while (0)
      #define        timersub(tvp, uvp, vvp)                                                \
              do {                                                                \
                      (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;                \
                      (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;        \
                      if ((vvp)->tv_usec < 0) {                                \
                              (vvp)->tv_sec--;                                \
                              (vvp)->tv_usec += 1000000;                        \
                      }                                                        \
              } while (0)
      
      /* Operations on timespecs. */
      #define        timespecclear(tsp)                (tsp)->tv_sec = (tsp)->tv_nsec = 0
      #define        timespecisset(tsp)                ((tsp)->tv_sec || (tsp)->tv_nsec)
      #define        timespecisvalid(tsp)                                                \
              ((tsp)->tv_nsec >= 0 && (tsp)->tv_nsec < 1000000000L)
      #define        timespeccmp(tsp, usp, cmp)                                        \
              (((tsp)->tv_sec == (usp)->tv_sec) ?                                \
                  ((tsp)->tv_nsec cmp (usp)->tv_nsec) :                        \
                  ((tsp)->tv_sec cmp (usp)->tv_sec))
      #define        timespecadd(tsp, usp, vsp)                                        \
              do {                                                                \
                      (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec;                \
                      (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec;        \
                      if ((vsp)->tv_nsec >= 1000000000L) {                        \
                              (vsp)->tv_sec++;                                \
                              (vsp)->tv_nsec -= 1000000000L;                        \
                      }                                                        \
              } while (0)
      #define        timespecsub(tsp, usp, vsp)                                        \
              do {                                                                \
                      (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec;                \
                      (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec;        \
                      if ((vsp)->tv_nsec < 0) {                                \
                              (vsp)->tv_sec--;                                \
                              (vsp)->tv_nsec += 1000000000L;                        \
                      }                                                        \
              } while (0)
      
      /*
       * Names of the interval timers, and structure
       * defining a timer setting.
       */
      #define        ITIMER_REAL        0
      #define        ITIMER_VIRTUAL        1
      #define        ITIMER_PROF        2
      
      struct        itimerval {
              struct        timeval it_interval;        /* timer interval */
              struct        timeval it_value;        /* current value */
      };
      
      #if __BSD_VISIBLE
      /*
       * clock information structure for sysctl({CTL_KERN, KERN_CLOCKRATE})
       */
      struct clockinfo {
              int        hz;                /* clock frequency */
              int        tick;                /* micro-seconds per hz tick */
              int        tickadj;        /* clock skew rate for adjtime() */
              int        stathz;                /* statistics clock frequency */
              int        profhz;                /* profiling clock frequency */
      };
      #endif /* __BSD_VISIBLE */
      
      #if defined(_KERNEL) || defined(_STANDALONE)
      #include <sys/_time.h>
      
      /* Time expressed as seconds and fractions of a second + operations on it. */
      struct bintime {
              time_t        sec;
              uint64_t frac;
      };
      
      #define bintimecmp(btp, ctp, cmp)                                        \
              ((btp)->sec == (ctp)->sec ?                                        \
                  (btp)->frac cmp (ctp)->frac :                                \
                  (btp)->sec cmp (ctp)->sec)
      
      static __inline void
      bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
      {
              ct->sec = bt->sec;
    2         if (bt->frac > bt->frac + x)
                      ct->sec++;
              ct->frac = bt->frac + x;
      }
      
      static __inline void
      bintimeadd(const struct bintime *bt, const struct bintime *ct,
          struct bintime *dt)
      {
              dt->sec = bt->sec + ct->sec;
              if (bt->frac > bt->frac + ct->frac)
                      dt->sec++;
              dt->frac = bt->frac + ct->frac;
      }
      
      static __inline void
      bintimesub(const struct bintime *bt, const struct bintime *ct,
          struct bintime *dt)
      {
              dt->sec = bt->sec - ct->sec;
    2         if (bt->frac < bt->frac - ct->frac)
                      dt->sec--;
              dt->frac = bt->frac - ct->frac;
      }
      
      /*-
       * Background information:
       *
       * When converting between timestamps on parallel timescales of differing
       * resolutions it is historical and scientific practice to round down rather
       * than doing 4/5 rounding.
       *
       *   The date changes at midnight, not at noon.
       *
       *   Even at 15:59:59.999999999 it's not four'o'clock.
       *
       *   time_second ticks after N.999999999 not after N.4999999999
       */
      
      static __inline void
      BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
      {
              ts->tv_sec = bt->sec;
              ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 32)) >> 32);
      }
      
      static __inline void
      TIMESPEC_TO_BINTIME(const struct timespec *ts, struct bintime *bt)
      {
              bt->sec = ts->tv_sec;
              /* 18446744073 = int(2^64 / 1000000000) */
              bt->frac = (uint64_t)ts->tv_nsec * (uint64_t)18446744073ULL; 
      }
      
      static __inline void
      BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
      {
              tv->tv_sec = bt->sec;
              tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) >> 32);
      }
      
      static __inline void
      TIMEVAL_TO_BINTIME(const struct timeval *tv, struct bintime *bt)
      {
              bt->sec = (time_t)tv->tv_sec;
              /* 18446744073709 = int(2^64 / 1000000) */
              bt->frac = (uint64_t)tv->tv_usec * (uint64_t)18446744073709ULL;
      }
      
      extern volatile time_t time_second;        /* Seconds since epoch, wall time. */
      extern volatile time_t time_uptime;        /* Seconds since reboot. */
      
      /*
       * Functions for looking at our clocks: [get]{bin,nano,micro}[boot|up]time()
       *
       * Functions without the "get" prefix returns the best timestamp
       * we can produce in the given format.
       *
       * "bin"   == struct bintime  == seconds + 64 bit fraction of seconds.
       * "nano"  == struct timespec == seconds + nanoseconds.
       * "micro" == struct timeval  == seconds + microseconds.
       *              
       * Functions containing "up" returns time relative to boot and
       * should be used for calculating time intervals.
       *
       * Functions containing "boot" return the GMT time at which the
       * system booted.
       *
       * Functions with just "time" return the current GMT time.
       *
       * Functions with the "get" prefix returns a less precise result
       * much faster than the functions without "get" prefix and should
       * be used where a precision of 10 msec is acceptable or where
       * performance is priority. (NB: "precision", _not_ "resolution" !) 
       */
      
      void        bintime(struct bintime *);
      void        nanotime(struct timespec *);
      void        microtime(struct timeval *);
      
      void        getnanotime(struct timespec *);
      void        getmicrotime(struct timeval *);
      
      void        binuptime(struct bintime *);
      void        nanouptime(struct timespec *);
      void        microuptime(struct timeval *);
      
      void        getnanouptime(struct timespec *);
      void        getmicrouptime(struct timeval *);
      
      void        binboottime(struct bintime *);
      void        microboottime(struct timeval *);
      
      struct proc;
      int        clock_gettime(struct proc *, clockid_t, struct timespec *);
      
      int        itimerfix(struct timeval *);
      int        itimerdecr(struct itimerspec *, long);
      int        settime(const struct timespec *);
      int        ratecheck(struct timeval *, const struct timeval *);
      int        ppsratecheck(struct timeval *, int *, int);
      
      /*
       * "POSIX time" to/from "YY/MM/DD/hh/mm/ss"
       */
      struct clock_ymdhms {
              u_short dt_year;
              u_char dt_mon;
              u_char dt_day;
              u_char dt_wday; /* Day of week */
              u_char dt_hour;
              u_char dt_min;
              u_char dt_sec;
      };
      
      time_t clock_ymdhms_to_secs(struct clock_ymdhms *);
      void clock_secs_to_ymdhms(time_t, struct clock_ymdhms *);
      /*
       * BCD to decimal and decimal to BCD.
       */
      #define FROMBCD(x)      (((x) >> 4) * 10 + ((x) & 0xf))
      #define TOBCD(x)        (((x) / 10 * 16) + ((x) % 10))
      
      /* Some handy constants. */
      #define SECDAY          86400L
      #define SECYR           (SECDAY * 365)
      
      /* Traditional POSIX base year */
      #define POSIX_BASE_YEAR 1970
      
      static __inline void
      NSEC_TO_TIMEVAL(uint64_t ns, struct timeval *tv)
      {
              tv->tv_sec = ns / 1000000000L;
              tv->tv_usec = (ns % 1000000000L) / 1000;
      }
      
      static __inline void
      NSEC_TO_TIMESPEC(uint64_t ns, struct timespec *ts)
      {
              ts->tv_sec = ns / 1000000000L;
              ts->tv_nsec = ns % 1000000000L;
      }
      
      #include <sys/stdint.h>
      
      static __inline uint64_t
      SEC_TO_NSEC(uint64_t seconds)
      {
              if (seconds > UINT64_MAX / 1000000000ULL)
                      return UINT64_MAX;
              return seconds * 1000000000ULL;
      }
      
      static __inline uint64_t
      MSEC_TO_NSEC(uint64_t milliseconds)
      {
              if (milliseconds > UINT64_MAX / 1000000ULL)
                      return UINT64_MAX;
              return milliseconds * 1000000ULL;
      }
      
      static __inline uint64_t
      USEC_TO_NSEC(uint64_t microseconds)
      {
              if (microseconds > UINT64_MAX / 1000ULL)
                      return UINT64_MAX;
              return microseconds * 1000ULL;
      }
      
      #else /* !_KERNEL */
      #include <time.h>
      
      #if __BSD_VISIBLE || __XPG_VISIBLE
      __BEGIN_DECLS
      #if __BSD_VISIBLE
      int        adjtime(const struct timeval *, struct timeval *);
      int        adjfreq(const int64_t *, int64_t *);
      #endif
      #if __XPG_VISIBLE
      int        futimes(int, const struct timeval *);
      int        getitimer(int, struct itimerval *);
      int        gettimeofday(struct timeval *, struct timezone *);
      int        setitimer(int, const struct itimerval *, struct itimerval *);
      int        settimeofday(const struct timeval *, const struct timezone *);
      int        utimes(const char *, const struct timeval *);
      #endif /* __XPG_VISIBLE */
      __END_DECLS
      #endif /* __BSD_VISIBLE || __XPG_VISIBLE */
      
      #endif /* !_KERNEL */
      
      #endif /* !_SYS_TIME_H_ */
      /*        $OpenBSD: ffs_vfsops.c,v 1.181 2019/07/25 01:43:21 cheloha Exp $        */
      /*        $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $        */
      
      /*
       * Copyright (c) 1989, 1991, 1993, 1994
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)ffs_vfsops.c        8.14 (Berkeley) 11/28/94
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/namei.h>
      #include <sys/proc.h>
      #include <sys/kernel.h>
      #include <sys/vnode.h>
      #include <sys/socket.h>
      #include <sys/mount.h>
      #include <sys/buf.h>
      #include <sys/mbuf.h>
      #include <sys/fcntl.h>
      #include <sys/ioctl.h>
      #include <sys/errno.h>
      #include <sys/malloc.h>
      #include <sys/sysctl.h>
      #include <sys/pool.h>
      #include <sys/dkio.h>
      #include <sys/disk.h>
      #include <sys/specdev.h>
      
      #include <ufs/ufs/quota.h>
      #include <ufs/ufs/ufsmount.h>
      #include <ufs/ufs/inode.h>
      #include <ufs/ufs/dir.h>
      #include <ufs/ufs/ufs_extern.h>
      #include <ufs/ufs/dirhash.h>
      
      #include <ufs/ffs/fs.h>
      #include <ufs/ffs/ffs_extern.h>
      
      #include <uvm/uvm_extern.h>
      
      int ffs_sbupdate(struct ufsmount *, int);
      int ffs_reload_vnode(struct vnode *, void *);
      int ffs_sync_vnode(struct vnode *, void *);
      int ffs_validate(struct fs *);
      
      void ffs1_compat_read(struct fs *, struct ufsmount *, daddr_t);
      void ffs1_compat_write(struct fs *, struct ufsmount *);
      
      const struct vfsops ffs_vfsops = {
              ffs_mount,
              ufs_start,
              ffs_unmount,
              ufs_root,
              ufs_quotactl,
              ffs_statfs,
              ffs_sync,
              ffs_vget,
              ffs_fhtovp,
              ffs_vptofh,
              ffs_init,
              ffs_sysctl,
              ufs_check_export
      };
      
      struct inode_vtbl ffs_vtbl = {
              ffs_truncate,
              ffs_update,
              ffs_inode_alloc,
              ffs_inode_free,
              ffs_balloc,
              ffs_bufatoff
      };
      
      int
      ffs_checkrange(struct mount *mp, uint32_t ino)
      {
              struct buf *bp;
              struct cg *cgp;
              struct fs *fs;
              struct ufsmount *ump;
              int cg, error;
      
              fs = VFSTOUFS(mp)->um_fs;
              if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
                      return ESTALE;
      
              /*
               * Need to check if inode is initialized because ffsv2 does
               * lazy initialization and we can get here from nfs_fhtovp
               */
              if (fs->fs_magic != FS_UFS2_MAGIC)
                      return 0;
      
              cg = ino_to_cg(fs, ino);
              ump = VFSTOUFS(mp);
      
              error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)),
                  (int)fs->fs_cgsize, &bp);
              if (error)
                      return error;
      
              cgp = (struct cg *)bp->b_data;
              if (!cg_chkmagic(cgp)) {
                      brelse(bp);
                      return ESTALE;
              }
      
              brelse(bp);
      
              if (cg * fs->fs_ipg + cgp->cg_initediblk < ino)
                      return ESTALE;
      
              return 0;
      }
      
      /*
       * Called by main() when ufs is going to be mounted as root.
       */
      
      struct pool ffs_ino_pool;
      struct pool ffs_dinode1_pool;
      #ifdef FFS2
      struct pool ffs_dinode2_pool;
      #endif
      
      int
      ffs_mountroot(void)
      {
              struct fs *fs;
              struct mount *mp;
              struct proc *p = curproc;        /* XXX */
              struct ufsmount *ump;
              int error;
      
              /*
               * Get vnodes for swapdev and rootdev.
               */
              swapdev_vp = NULL;
              if ((error = bdevvp(swapdev, &swapdev_vp)) ||
                  (error = bdevvp(rootdev, &rootvp))) {
                      printf("ffs_mountroot: can't setup bdevvp's\n");
                      if (swapdev_vp)
                              vrele(swapdev_vp);
                      return (error);
              }
      
              if ((error = vfs_rootmountalloc("ffs", "root_device", &mp)) != 0) {
                      vrele(swapdev_vp);
                      vrele(rootvp);
                      return (error);
              }
      
              if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
                      vfs_unbusy(mp);
                      vfs_mount_free(mp);
                      vrele(swapdev_vp);
                      vrele(rootvp);
                      return (error);
              }
      
              TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
              ump = VFSTOUFS(mp);
              fs = ump->um_fs;
              strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, sizeof(fs->fs_fsmnt));
              (void)ffs_statfs(mp, &mp->mnt_stat, p);
              vfs_unbusy(mp);
              inittodr(fs->fs_time);
      
              return (0);
      }
      
      /*
       * VFS Operations.
       *
       * mount system call
       */
      int
      ffs_mount(struct mount *mp, const char *path, void *data,
          struct nameidata *ndp, struct proc *p)
      {
              struct vnode *devvp;
              struct ufs_args *args = data;
              struct ufsmount *ump = NULL;
              struct fs *fs;
              char fname[MNAMELEN];
              char fspec[MNAMELEN];
              int error = 0, flags;
              int ronly;
      
      #ifndef FFS_SOFTUPDATES
              if (mp->mnt_flag & MNT_SOFTDEP) {
                      printf("WARNING: soft updates isn't compiled in\n");
                      mp->mnt_flag &= ~MNT_SOFTDEP;
              }
      #endif
      
              /*
               * Soft updates is incompatible with "async",
               * so if we are doing softupdates stop the user
               * from setting the async flag.
               */
              if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) ==
                  (MNT_SOFTDEP | MNT_ASYNC)) {
                      return (EINVAL);
              }
              /*
               * If updating, check whether changing from read-only to
               * read/write; if there is no device name, that's all we do.
               */
              if (mp->mnt_flag & MNT_UPDATE) {
                      ump = VFSTOUFS(mp);
                      fs = ump->um_fs;
                      devvp = ump->um_devvp;
                      error = 0;
                      ronly = fs->fs_ronly;
      
                      /*
                       * Soft updates won't be set if read/write,
                       * so "async" will be illegal.
                       */
                      if (ronly == 0 && (mp->mnt_flag & MNT_ASYNC) &&
                          (fs->fs_flags & FS_DOSOFTDEP)) {
                              error = EINVAL;
                              goto error_1;
                      }
      
                      if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
                              /* Flush any dirty data */
                              VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p);
      
                              /*
                               * Get rid of files open for writing.
                               */
                              flags = WRITECLOSE;
                              if (args == NULL)
                                      flags |= IGNORECLEAN;
                              if (mp->mnt_flag & MNT_FORCE)
                                      flags |= FORCECLOSE;
                              if (fs->fs_flags & FS_DOSOFTDEP) {
                                      error = softdep_flushfiles(mp, flags, p);
                                      mp->mnt_flag &= ~MNT_SOFTDEP;
                              } else
                                      error = ffs_flushfiles(mp, flags, p);
                              mp->mnt_flag |= MNT_RDONLY;
                              ronly = 1;
                      }
      
                      /*
                       * Flush soft dependencies if disabling it via an update
                       * mount. This may leave some items to be processed,
                       * so don't do this yet XXX.
                       */
                      if ((fs->fs_flags & FS_DOSOFTDEP) &&
                          !(mp->mnt_flag & MNT_SOFTDEP) &&
                          !(mp->mnt_flag & MNT_RDONLY) && fs->fs_ronly == 0) {
      #if 0
                              flags = WRITECLOSE;
                              if (mp->mnt_flag & MNT_FORCE)
                                      flags |= FORCECLOSE;
                              error = softdep_flushfiles(mp, flags, p);
      #elif FFS_SOFTUPDATES
                              mp->mnt_flag |= MNT_SOFTDEP;
      #endif
                      }
                      /*
                       * When upgrading to a softdep mount, we must first flush
                       * all vnodes. (not done yet -- see above)
                       */
                      if (!(fs->fs_flags & FS_DOSOFTDEP) &&
                          (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) {
      #if 0
                              flags = WRITECLOSE;
                              if (mp->mnt_flag & MNT_FORCE)
                                      flags |= FORCECLOSE;
                              error = ffs_flushfiles(mp, flags, p);
      #else
                              mp->mnt_flag &= ~MNT_SOFTDEP;
      #endif
                      }
      
                      if (!error && (mp->mnt_flag & MNT_RELOAD))
                              error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
                      if (error)
                              goto error_1;
      
                      if (ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
                              if (fs->fs_clean == 0) {
      #if 0
                                      /*
                                       * It is safe to mount an unclean file system
                                       * if it was previously mounted with softdep
                                       * but we may lose space and must
                                       * sometimes run fsck manually.
                                       */
                                      if (fs->fs_flags & FS_DOSOFTDEP)
                                              printf(
      "WARNING: %s was not properly unmounted\n",
                                                  fs->fs_fsmnt);
                                      else
      #endif
                                      if (mp->mnt_flag & MNT_FORCE) {
                                              printf(
      "WARNING: %s was not properly unmounted\n",
                                                  fs->fs_fsmnt);
                                      } else {
                                              printf(
      "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
                                                  fs->fs_fsmnt);
                                              error = EROFS;
                                              goto error_1;
                                      }
                              }
      
                              if ((fs->fs_flags & FS_DOSOFTDEP)) {
                                      error = softdep_mount(devvp, mp, fs,
                                                            p->p_ucred);
                                      if (error)
                                              goto error_1;
                              }
                              fs->fs_contigdirs = malloc((u_long)fs->fs_ncg,
                                   M_UFSMNT, M_WAITOK|M_ZERO);
      
                              ronly = 0;
                      }
                      if (args == NULL)
                              goto success;
                      if (args->fspec == NULL) {
                              /*
                               * Process export requests.
                               */
                              error = vfs_export(mp, &ump->um_export, 
                                  &args->export_info);
                              if (error)
                                      goto error_1;
                              else
                                      goto success;
                      }
              }
      
              /*
               * Not an update, or updating the name: look up the name
               * and verify that it refers to a sensible block device.
               */
              error = copyinstr(args->fspec, fspec, sizeof(fspec), NULL);
              if (error)
                      goto error_1;
      
              if (disk_map(fspec, fname, MNAMELEN, DM_OPENBLCK) == -1)
                      memcpy(fname, fspec, sizeof(fname));
      
              NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p);
              if ((error = namei(ndp)) != 0)
                      goto error_1;
      
              devvp = ndp->ni_vp;
      
              if (devvp->v_type != VBLK) {
                      error = ENOTBLK;
                      goto error_2;
              }
      
              if (major(devvp->v_rdev) >= nblkdev) {
                      error = ENXIO;
                      goto error_2;
              }
      
              if (mp->mnt_flag & MNT_UPDATE) {
                      /*
                       * UPDATE
                       * If it's not the same vnode, or at least the same device
                       * then it's not correct.
                       */
      
                      if (devvp != ump->um_devvp) {
                              if (devvp->v_rdev == ump->um_devvp->v_rdev) {
                                      vrele(devvp);
                              } else {
                                      error = EINVAL;        /* needs translation */
                              }
                      } else
                              vrele(devvp);
                      /*
                       * Update device name only on success
                       */
                      if (!error) {
                              /*
                               * Save "mounted from" info for mount point (NULL pad)
                               */
                              memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
                              strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN);
                              memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN);
                              strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN);
                      }
              } else {
                      /*
                       * Since this is a new mount, we want the names for
                       * the device and the mount point copied in.  If an
                       * error occurs,  the mountpoint is discarded by the
                       * upper level code.
                       */
                      memset(mp->mnt_stat.f_mntonname, 0, MNAMELEN);
                      strlcpy(mp->mnt_stat.f_mntonname, path, MNAMELEN);
                      memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
                      strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN);
                      memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN);
                      strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN);
      
                      error = ffs_mountfs(devvp, mp, p);
              }
      
              if (error)
                      goto error_2;
      
              /*
               * Initialize FS stat information in mount struct; uses both
               * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
               *
               * This code is common to root and non-root mounts
               */
              if (args)
                      memcpy(&mp->mnt_stat.mount_info.ufs_args, args, sizeof(*args));
              VFS_STATFS(mp, &mp->mnt_stat, p);
      
      success:
              if (path && (mp->mnt_flag & MNT_UPDATE)) {
                      /* Update clean flag after changing read-onlyness. */
                      fs = ump->um_fs;
                      if (ronly != fs->fs_ronly) {
                              fs->fs_ronly = ronly;
                              fs->fs_clean = ronly &&
                                  (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
                              if (ronly)
                                      free(fs->fs_contigdirs, M_UFSMNT, fs->fs_ncg);
                      }
                      if (!ronly) {
                              if (mp->mnt_flag & MNT_SOFTDEP)
                                      fs->fs_flags |= FS_DOSOFTDEP;
                              else
                                      fs->fs_flags &= ~FS_DOSOFTDEP;
                      }
                      ffs_sbupdate(ump, MNT_WAIT);
      #if 0
                      if (ronly) {
                              int force = 0;
      
                              /*
                               * Updating mount to readonly. Try a cache flush.
                               * Ignore error because the ioctl may not be supported.
                               */
                              VOP_IOCTL(ump->um_devvp, DIOCCACHESYNC, &force,
                                  FWRITE, FSCRED, p);
                     }
      #endif
              }
              return (0);
      
      error_2:        /* error with devvp held */
              vrele (devvp);
      
      error_1:        /* no state to back out */
              return (error);
      }
      
      struct ffs_reload_args {
              struct fs *fs;
              struct proc *p;
              struct ucred *cred;
              struct vnode *devvp;
      };
      
      int
      ffs_reload_vnode(struct vnode *vp, void *args) 
      {
              struct ffs_reload_args *fra = args;
              struct inode *ip;
              struct buf *bp;
              int error;
      
              /*
               * Step 4: invalidate all inactive vnodes.
               */
              if (vp->v_usecount == 0) {
                      vgonel(vp, fra->p);
                      return (0);
              }
      
              /*
               * Step 5: invalidate all cached file data.
               */
              if (vget(vp, LK_EXCLUSIVE))
                      return (0);
      
              if (vinvalbuf(vp, 0, fra->cred, fra->p, 0, INFSLP))
                      panic("ffs_reload: dirty2");
      
              /*
               * Step 6: re-read inode data for all active vnodes.
               */
              ip = VTOI(vp);
      
              error = bread(fra->devvp, 
                  fsbtodb(fra->fs, ino_to_fsba(fra->fs, ip->i_number)),
                  (int)fra->fs->fs_bsize, &bp);
              if (error) {
                      brelse(bp);
                      vput(vp);
                      return (error);
              }
      
              *ip->i_din1 = *((struct ufs1_dinode *)bp->b_data +
                  ino_to_fsbo(fra->fs, ip->i_number));
              ip->i_effnlink = DIP(ip, nlink);
              brelse(bp);
              vput(vp);
              return (0);
      }
      
      /*
       * Reload all incore data for a filesystem (used after running fsck on
       * the root filesystem and finding things to fix). The filesystem must
       * be mounted read-only.
       *
       * Things to do to update the mount:
       *        1) invalidate all cached meta-data.
       *        2) re-read superblock from disk.
       *        3) re-read summary information from disk.
       *        4) invalidate all inactive vnodes.
       *        5) invalidate all cached file data.
       *        6) re-read inode data for all active vnodes.
       */
      int
      ffs_reload(struct mount *mountp, struct ucred *cred, struct proc *p)
      {
              struct vnode *devvp;
              caddr_t space;
              struct fs *fs, *newfs;
              int i, blks, size, error;
              int32_t *lp;
              struct buf *bp = NULL;
              struct ffs_reload_args fra;
      
              if ((mountp->mnt_flag & MNT_RDONLY) == 0)
                      return (EINVAL);
              /*
               * Step 1: invalidate all cached meta-data.
               */
              devvp = VFSTOUFS(mountp)->um_devvp;
              vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
              error = vinvalbuf(devvp, 0, cred, p, 0, INFSLP);
              VOP_UNLOCK(devvp);
              if (error)
                      panic("ffs_reload: dirty1");
      
              /*
               * Step 2: re-read superblock from disk.
               */
              fs = VFSTOUFS(mountp)->um_fs;
      
              error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, SBSIZE, &bp);
              if (error) {
                      brelse(bp);
                      return (error);
              }
      
              newfs = (struct fs *)bp->b_data;
              if (ffs_validate(newfs) == 0) {
                      brelse(bp);
                      return (EINVAL);
              }
      
              /*
               * Copy pointer fields back into superblock before copying in        XXX
               * new superblock. These should really be in the ufsmount.        XXX
               * Note that important parameters (eg fs_ncg) are unchanged.
               */
              newfs->fs_csp = fs->fs_csp;
              newfs->fs_maxcluster = fs->fs_maxcluster;
              newfs->fs_ronly = fs->fs_ronly;
              memcpy(fs, newfs, fs->fs_sbsize);
              if (fs->fs_sbsize < SBSIZE)
                      bp->b_flags |= B_INVAL;
              brelse(bp);
              VFSTOUFS(mountp)->um_maxsymlinklen = fs->fs_maxsymlinklen;
              ffs1_compat_read(fs, VFSTOUFS(mountp), fs->fs_sblockloc);
              ffs_oldfscompat(fs);
              (void)ffs_statfs(mountp, &mountp->mnt_stat, p);
              /*
               * Step 3: re-read summary information from disk.
               */
              blks = howmany(fs->fs_cssize, fs->fs_fsize);
              space = (caddr_t)fs->fs_csp;
              for (i = 0; i < blks; i += fs->fs_frag) {
                      size = fs->fs_bsize;
                      if (i + fs->fs_frag > blks)
                              size = (blks - i) * fs->fs_fsize;
                      error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, &bp);
                      if (error) {
                              brelse(bp);
                              return (error);
                      }
                      memcpy(space, bp->b_data, size);
                      space += size;
                      brelse(bp);
              }
              if ((fs->fs_flags & FS_DOSOFTDEP))
                      (void) softdep_mount(devvp, mountp, fs, cred);
              /*
               * We no longer know anything about clusters per cylinder group.
               */
              if (fs->fs_contigsumsize > 0) {
                      lp = fs->fs_maxcluster;
                      for (i = 0; i < fs->fs_ncg; i++)
                              *lp++ = fs->fs_contigsumsize;
              }
      
              fra.p = p;
              fra.cred = cred;
              fra.fs = fs;
              fra.devvp = devvp;
      
              error = vfs_mount_foreach_vnode(mountp, ffs_reload_vnode, &fra);
      
              return (error);
      }
      
      /*
       * Checks if a super block is sane enough to be mounted.
       */
      int
      ffs_validate(struct fs *fsp)
      {
      #ifdef FFS2
              if (fsp->fs_magic != FS_UFS2_MAGIC && fsp->fs_magic != FS_UFS1_MAGIC)
                      return (0); /* Invalid magic */
      #else
              if (fsp->fs_magic != FS_UFS1_MAGIC)
                      return (0); /* Invalid magic */
      #endif /* FFS2 */
      
              if ((u_int)fsp->fs_bsize > MAXBSIZE)
                      return (0); /* Invalid block size */
      
              if ((u_int)fsp->fs_bsize < sizeof(struct fs))
                      return (0); /* Invalid block size */
      
              if ((u_int)fsp->fs_sbsize > SBSIZE)
                      return (0); /* Invalid super block size */
      
              if ((u_int)fsp->fs_frag > MAXFRAG || fragtbl[fsp->fs_frag] == NULL)
                      return (0); /* Invalid number of fragments */
      
              if (fsp->fs_inodefmt == FS_42INODEFMT)
                      fsp->fs_maxsymlinklen = 0;
              else if (fsp->fs_maxsymlinklen < 0)
                      return (0); /* Invalid max size of short symlink */
      
              return (1); /* Super block is okay */
      }
      
      /*
       * Possible locations for the super-block.
       */
      const int sbtry[] = SBLOCKSEARCH;
      
      /*
       * Common code for mount and mountroot
       */
      int
      ffs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p)
      {
              struct ufsmount *ump;
              struct buf *bp;
              struct fs *fs;
              dev_t dev;
              caddr_t space;
              daddr_t sbloc;
              int error, i, blks, size, ronly;
              int32_t *lp;
              struct ucred *cred;
              u_int64_t maxfilesize;                                        /* XXX */
      
              dev = devvp->v_rdev;
              cred = p ? p->p_ucred : NOCRED;
              /*
               * Disallow multiple mounts of the same device.
               * Disallow mounting of a device that is currently in use
               * (except for root, which might share swap device for miniroot).
               * Flush out any old buffers remaining from a previous use.
               */
              if ((error = vfs_mountedon(devvp)) != 0)
                      return (error);
              if (vcount(devvp) > 1 && devvp != rootvp)
                      return (EBUSY);
              vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
              error = vinvalbuf(devvp, V_SAVE, cred, p, 0, INFSLP);
              VOP_UNLOCK(devvp);
              if (error)
                      return (error);
      
              ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
              error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
              if (error)
                      return (error);
      
              bp = NULL;
              ump = NULL;
      
              /*
               * Try reading the super-block in each of its possible locations.
               */
              for (i = 0; sbtry[i] != -1; i++) {
                      if (bp != NULL) {
                              bp->b_flags |= B_NOCACHE;
                              brelse(bp);
                              bp = NULL;
                      }
      
                      error = bread(devvp, sbtry[i] / DEV_BSIZE, SBSIZE, &bp);
                      if (error)
                              goto out;
      
                      fs = (struct fs *) bp->b_data;
                      sbloc = sbtry[i];
      
      #if 0
                      if (fs->fs_magic == FS_UFS2_MAGIC) {
                              printf("ffs_mountfs(): Sorry, no UFS2 support (yet)\n");
                              error = EFTYPE;
                              goto out;
                      }
      #endif
      
                      /*
                       * Do not look for an FFS1 file system at SBLOCK_UFS2. Doing so
                       * will find the wrong super-block for file systems with 64k
                       * block size.
                       */
                      if (fs->fs_magic == FS_UFS1_MAGIC && sbloc == SBLOCK_UFS2)
                              continue;
      
                      if (ffs_validate(fs))
                              break; /* Super block validated */
              }
      
              if (sbtry[i] == -1) {
                      error = EINVAL;
                      goto out;
              }
      
              fs->fs_fmod = 0;
              fs->fs_flags &= ~FS_UNCLEAN;
              if (fs->fs_clean == 0) {
      #if 0
                      /*
                       * It is safe to mount an unclean file system
                       * if it was previously mounted with softdep
                       * but we may lose space and must
                       * sometimes run fsck manually.
                       */
                      if (fs->fs_flags & FS_DOSOFTDEP)
                              printf(
      "WARNING: %s was not properly unmounted\n",
                                  fs->fs_fsmnt);
                      else
      #endif
                      if (ronly || (mp->mnt_flag & MNT_FORCE)) {
                              printf(
      "WARNING: %s was not properly unmounted\n",
                                  fs->fs_fsmnt);
                      } else {
                              printf(
      "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
                                  fs->fs_fsmnt);
                              error = EROFS;
                              goto out;
                      }
              }
      
              if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
      #ifndef SMALL_KERNEL
                      printf("ffs_mountfs(): obsolete rotational table format, "
                          "please use fsck_ffs(8) -c 1\n");
      #endif
                      error = EFTYPE;
                      goto out;
              }
      
              ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK|M_ZERO);
              ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
                  M_WAITOK);
      
              if (fs->fs_magic == FS_UFS1_MAGIC)
                      ump->um_fstype = UM_UFS1;
      #ifdef FFS2
              else
                      ump->um_fstype = UM_UFS2;
      #endif
      
              memcpy(ump->um_fs, bp->b_data, fs->fs_sbsize);
              if (fs->fs_sbsize < SBSIZE)
                      bp->b_flags |= B_INVAL;
              brelse(bp);
              bp = NULL;
              fs = ump->um_fs;
      
              ffs1_compat_read(fs, ump, sbloc);
      
              if (fs->fs_clean == 0)
                      fs->fs_flags |= FS_UNCLEAN;
              fs->fs_ronly = ronly;
              size = fs->fs_cssize;
              blks = howmany(size, fs->fs_fsize);
              if (fs->fs_contigsumsize > 0)
                      size += fs->fs_ncg * sizeof(int32_t);
              space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
              fs->fs_csp = (struct csum *)space;
              for (i = 0; i < blks; i += fs->fs_frag) {
                      size = fs->fs_bsize;
                      if (i + fs->fs_frag > blks)
                              size = (blks - i) * fs->fs_fsize;
                      error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, &bp);
                      if (error) {
                              free(fs->fs_csp, M_UFSMNT, 0);
                              goto out;
                      }
                      memcpy(space, bp->b_data, size);
                      space += size;
                      brelse(bp);
                      bp = NULL;
              }
              if (fs->fs_contigsumsize > 0) {
                      fs->fs_maxcluster = lp = (int32_t *)space;
                      for (i = 0; i < fs->fs_ncg; i++)
                              *lp++ = fs->fs_contigsumsize;
              }
              mp->mnt_data = ump;
              mp->mnt_stat.f_fsid.val[0] = (long)dev;
              /* Use on-disk fsid if it exists, else fake it */
              if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
                      mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
              else
                      mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
              mp->mnt_stat.f_namemax = MAXNAMLEN;
              mp->mnt_flag |= MNT_LOCAL;
              ump->um_mountp = mp;
              ump->um_dev = dev;
              ump->um_devvp = devvp;
              ump->um_nindir = fs->fs_nindir;
              ump->um_bptrtodb = fs->fs_fsbtodb;
              ump->um_seqinc = fs->fs_frag;
              ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
              for (i = 0; i < MAXQUOTAS; i++)
                      ump->um_quotas[i] = NULLVP;
      
              devvp->v_specmountpoint = mp;
              ffs_oldfscompat(fs);
      
              if (ronly)
                      fs->fs_contigdirs = NULL;
              else {
                      fs->fs_contigdirs = malloc((u_long)fs->fs_ncg,
                          M_UFSMNT, M_WAITOK|M_ZERO);
              }
      
              /*
               * Set FS local "last mounted on" information (NULL pad)
               */
              memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
              strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, sizeof(fs->fs_fsmnt));
      
      #if 0
              if( mp->mnt_flag & MNT_ROOTFS) {
                      /*
                       * Root mount; update timestamp in mount structure.
                       * this will be used by the common root mount code
                       * to update the system clock.
                       */
                      mp->mnt_time = fs->fs_time;
              }
      #endif
      
              /*
               * XXX
               * Limit max file size.  Even though ffs can handle files up to 16TB,
               * we do limit the max file to 2^31 pages to prevent overflow of
               * a 32-bit unsigned int.  The buffer cache has its own checks but
               * a little added paranoia never hurts.
               */
              ump->um_savedmaxfilesize = fs->fs_maxfilesize;                /* XXX */
              maxfilesize = FS_KERNMAXFILESIZE(PAGE_SIZE, fs);
              if (fs->fs_maxfilesize > maxfilesize)                        /* XXX */
                      fs->fs_maxfilesize = maxfilesize;                /* XXX */
              if (ronly == 0) {
                      if ((fs->fs_flags & FS_DOSOFTDEP) &&
                          (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
                              free(fs->fs_csp, M_UFSMNT, 0);
                              free(fs->fs_contigdirs, M_UFSMNT, fs->fs_ncg);
                              goto out;
                      }
                      fs->fs_fmod = 1;
                      fs->fs_clean = 0;
                      if (mp->mnt_flag & MNT_SOFTDEP)
                              fs->fs_flags |= FS_DOSOFTDEP;
                      else
                              fs->fs_flags &= ~FS_DOSOFTDEP;
                      error = ffs_sbupdate(ump, MNT_WAIT);
                      if (error == EROFS)
                              goto out;
              }
              return (0);
      out:
              if (devvp->v_specinfo)
                      devvp->v_specmountpoint = NULL;
              if (bp)
                      brelse(bp);
      
              vn_lock(devvp, LK_EXCLUSIVE|LK_RETRY);
              (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
              VOP_UNLOCK(devvp);
      
              if (ump) {
                      free(ump->um_fs, M_UFSMNT, ump->um_fs->fs_sbsize);
                      free(ump, M_UFSMNT, sizeof(*ump));
                      mp->mnt_data = NULL;
              }
              return (error);
      }
      
      /*
       * Sanity checks for old file systems.
       */
      int
      ffs_oldfscompat(struct fs *fs)
      {
              int i;
      
              fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);        /* XXX */
              fs->fs_interleave = max(fs->fs_interleave, 1);                /* XXX */
              if (fs->fs_postblformat == FS_42POSTBLFMT)                /* XXX */
                      fs->fs_nrpos = 8;                                /* XXX */
              if (fs->fs_inodefmt < FS_44INODEFMT) {                        /* XXX */
                      u_int64_t sizepb = fs->fs_bsize;                /* XXX */
                                                                      /* XXX */
                      fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;        /* XXX */
                      for (i = 0; i < NIADDR; i++) {                        /* XXX */
                              sizepb *= NINDIR(fs);                        /* XXX */
                              fs->fs_maxfilesize += sizepb;                /* XXX */
                      }                                                /* XXX */
                      fs->fs_qbmask = ~fs->fs_bmask;                        /* XXX */
                      fs->fs_qfmask = ~fs->fs_fmask;                        /* XXX */
              }                                                        /* XXX */
              if (fs->fs_avgfilesize <= 0)                                /* XXX */
                      fs->fs_avgfilesize = AVFILESIZ;                        /* XXX */
              if (fs->fs_avgfpdir <= 0)                                /* XXX */
                      fs->fs_avgfpdir = AFPDIR;                        /* XXX */
              return (0);
      }
      
      /*
       * Auxiliary function for reading FFS1 super blocks.
       */
      void
      ffs1_compat_read(struct fs *fs, struct ufsmount *ump, daddr_t sbloc)
      {
              if (fs->fs_magic == FS_UFS2_MAGIC)
                      return; /* UFS2 */
      #if 0
              if (fs->fs_ffs1_flags & FS_FLAGS_UPDATED)
                      return; /* Already updated */
      #endif
              fs->fs_flags = fs->fs_ffs1_flags;
              fs->fs_sblockloc = sbloc;
              fs->fs_maxbsize = fs->fs_bsize;
              fs->fs_time = fs->fs_ffs1_time;
              fs->fs_size = fs->fs_ffs1_size;
              fs->fs_dsize = fs->fs_ffs1_dsize;
              fs->fs_csaddr = fs->fs_ffs1_csaddr;
              fs->fs_cstotal.cs_ndir = fs->fs_ffs1_cstotal.cs_ndir;
              fs->fs_cstotal.cs_nbfree = fs->fs_ffs1_cstotal.cs_nbfree;
              fs->fs_cstotal.cs_nifree = fs->fs_ffs1_cstotal.cs_nifree;
              fs->fs_cstotal.cs_nffree = fs->fs_ffs1_cstotal.cs_nffree;
              fs->fs_ffs1_flags |= FS_FLAGS_UPDATED;
      }
      
      /*
       * Auxiliary function for writing FFS1 super blocks.
       */
      void
      ffs1_compat_write(struct fs *fs, struct ufsmount *ump)
      {
              if (fs->fs_magic != FS_UFS1_MAGIC)
                      return; /* UFS2 */
      
    1         fs->fs_ffs1_time = fs->fs_time;
              fs->fs_ffs1_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
              fs->fs_ffs1_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
              fs->fs_ffs1_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
              fs->fs_ffs1_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
      }
      
      /*
       * unmount system call
       */
      int
      ffs_unmount(struct mount *mp, int mntflags, struct proc *p)
      {
              struct ufsmount *ump;
              struct fs *fs;
              int error, flags;
      
              flags = 0;
              if (mntflags & MNT_FORCE)
                      flags |= FORCECLOSE;
      
              ump = VFSTOUFS(mp);
              fs = ump->um_fs;
              if (mp->mnt_flag & MNT_SOFTDEP)
                      error = softdep_flushfiles(mp, flags, p);
              else
                      error = ffs_flushfiles(mp, flags, p);
              if (error != 0)
                      return (error);
      
              if (fs->fs_ronly == 0) {
                      fs->fs_clean = (fs->fs_flags & FS_UNCLEAN) ? 0 : 1;
                      error = ffs_sbupdate(ump, MNT_WAIT);
                      /* ignore write errors if mounted RW on read-only device */
                      if (error && error != EROFS) {
                              fs->fs_clean = 0;
                              return (error);
                      }
                      free(fs->fs_contigdirs, M_UFSMNT, fs->fs_ncg);
              }
              ump->um_devvp->v_specmountpoint = NULL;
      
              vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
              vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, INFSLP);
              (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
                  NOCRED, p);
              vput(ump->um_devvp);
              free(fs->fs_csp, M_UFSMNT, 0);
              free(fs, M_UFSMNT, fs->fs_sbsize);
              free(ump, M_UFSMNT, sizeof(*ump));
              mp->mnt_data = NULL;
              mp->mnt_flag &= ~MNT_LOCAL;
              return (0);
      }
      
      /*
       * Flush out all the files in a filesystem.
       */
      int
      ffs_flushfiles(struct mount *mp, int flags, struct proc *p)
      {
              struct ufsmount *ump;
              int error;
      
              ump = VFSTOUFS(mp);
              if (mp->mnt_flag & MNT_QUOTA) {
                      int i;
                      if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
                              return (error);
                      for (i = 0; i < MAXQUOTAS; i++) {
                              if (ump->um_quotas[i] == NULLVP)
                                      continue;
                              quotaoff(p, mp, i);
                      }
                      /*
                       * Here we fall through to vflush again to ensure
                       * that we have gotten rid of all the system vnodes.
                       */
              }
      
              /*
               * Flush all the files.
               */
              if ((error = vflush(mp, NULL, flags)) != 0)
                      return (error);
              /*
               * Flush filesystem metadata.
               */
              vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
              error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
              VOP_UNLOCK(ump->um_devvp);
              return (error);
      }
      
      /*
       * Get file system statistics.
       */
      int
      ffs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p)
      {
              struct ufsmount *ump;
              struct fs *fs;
      
              ump = VFSTOUFS(mp);
              fs = ump->um_fs;
      
      #ifdef FFS2
              if (fs->fs_magic != FS_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
                      panic("ffs_statfs");
      #else
              if (fs->fs_magic != FS_MAGIC)
                      panic("ffs_statfs");
      #endif /* FFS2 */
      
              sbp->f_bsize = fs->fs_fsize;
              sbp->f_iosize = fs->fs_bsize;
              sbp->f_blocks = fs->fs_dsize;
              sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
                  fs->fs_cstotal.cs_nffree;
              sbp->f_bavail = sbp->f_bfree -
                  ((int64_t)fs->fs_dsize * fs->fs_minfree / 100);
              sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
              sbp->f_ffree = fs->fs_cstotal.cs_nifree;
              sbp->f_favail = sbp->f_ffree;
              copy_statfs_info(sbp, mp);
      
              return (0);
      }
      
      struct ffs_sync_args {
              int allerror;
              struct proc *p;
              int waitfor;
              int nlink0;
              int inflight;
              struct ucred *cred;
      };
      
      int
      ffs_sync_vnode(struct vnode *vp, void *arg)
    3 {
              struct ffs_sync_args *fsa = arg;
              struct inode *ip;
              int error, nlink0 = 0;
      
    3         if (vp->v_type == VNON)
                      return (0);
      
              ip = VTOI(vp);
      
    3         if (vp->v_inflight && !(vp->v_type == VCHR || vp->v_type == VBLK))
    1                 fsa->inflight = MIN(fsa->inflight+1, 65536);
      
              /*
               * If unmounting or converting rw to ro, then stop deferring
               * timestamp writes.
               */
    3         if (fsa->waitfor == MNT_WAIT && (ip->i_flag & IN_LAZYMOD)) {
                      ip->i_flag |= IN_MODIFIED;
                      UFS_UPDATE(ip, 1);
              }
      
              if (ip->i_effnlink == 0)
                      nlink0 = 1;
      
    3         if ((ip->i_flag &
    3             (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
                  LIST_EMPTY(&vp->v_dirtyblkhd)) {
                      goto end;
              }
      
    1         if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT)) {
                      nlink0 = 1;                /* potentially.. */
                      goto end;
              }
      
    3         if ((error = VOP_FSYNC(vp, fsa->cred, fsa->waitfor, fsa->p)))
                      fsa->allerror = error;
              VOP_UNLOCK(vp);
              vrele(vp);
      
      end:
              fsa->nlink0 = MIN(fsa->nlink0 + nlink0, 65536);
              return (0);
      }
      
      /*
       * Go through the disk queues to initiate sandbagged IO;
       * go through the inodes to write those that have been modified;
       * initiate the writing of the super block if it has been modified.
       *
       * Should always be called with the mount point locked.
       */
      int
      ffs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p)
    3 {
              struct ufsmount *ump = VFSTOUFS(mp);
              struct fs *fs;
              int error, allerror = 0, count, clean, fmod;
              struct ffs_sync_args fsa;
      
              fs = ump->um_fs;
              /*
               * Write back modified superblock.
               * Consistency check that the superblock
               * is still in the buffer cache.
               */
    3         if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {
                      printf("fs = %s\n", fs->fs_fsmnt);
                      panic("update: rofs mod");
              }
       loop:
              /*
               * Write back each (modified) inode.
               */
              fsa.allerror = 0;
              fsa.p = p;
              fsa.cred = cred;
              fsa.waitfor = waitfor;
              fsa.nlink0 = 0;
              fsa.inflight = 0;
      
              /*
               * Don't traverse the vnode list if we want to skip all of them.
               */
              if (waitfor != MNT_LAZY) {
    3                 vfs_mount_foreach_vnode(mp, ffs_sync_vnode, &fsa);
                      allerror = fsa.allerror;
              }
      
              /*
               * Force stale file system control information to be flushed.
               */
    2         if ((ump->um_mountp->mnt_flag & MNT_SOFTDEP) && waitfor == MNT_WAIT) {
                      if ((error = softdep_flushworklist(ump->um_mountp, &count, p)))
                              allerror = error;
                      /* Flushed work items may create new vnodes to clean */
                      if (count) 
                              goto loop;
              }
              if (waitfor != MNT_LAZY) {
    2                 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
                      if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
                              allerror = error;
                      VOP_UNLOCK(ump->um_devvp);
              }
              qsync(mp);
              /*
               * Write back modified superblock.
               */
              clean = fs->fs_clean;
              fmod = fs->fs_fmod;
    2         if (stall && fs->fs_ronly == 0) {
                      fs->fs_fmod = 1;
                      if (allerror == 0 && fsa.nlink0 == 0 && fsa.inflight == 0) {
                              fs->fs_clean = (fs->fs_flags & FS_UNCLEAN) ? 0 : 1;
      #if 0
                              printf("%s force clean (dangling %d inflight %d)\n",
                                  mp->mnt_stat.f_mntonname, fsa.nlink0, fsa.inflight);
      #endif
                      } else {
                              fs->fs_clean = 0;
      #if 0
                              printf("%s force dirty (dangling %d inflight %d)\n",
                                  mp->mnt_stat.f_mntonname, fsa.nlink0, fsa.inflight);
      #endif
                      }
              }
    2         if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
                      allerror = error;
              fs->fs_clean = clean;
              fs->fs_fmod = fmod;
      
              return (allerror);
      }
      
      /*
       * Look up a FFS dinode number to find its incore vnode, otherwise read it
       * in from disk.  If it is in core, wait for the lock bit to clear, then
       * return the inode locked.  Detection and handling of mount points must be
       * done by the calling routine.
       */
      int
      ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
  296 {
              struct fs *fs;
              struct inode *ip;
              struct ufs1_dinode *dp1;
      #ifdef FFS2
              struct ufs2_dinode *dp2;
      #endif
              struct ufsmount *ump;
              struct buf *bp;
              struct vnode *vp;
              dev_t dev;
              int error;
      
              if (ino > (ufsino_t)-1)
                      panic("ffs_vget: alien ino_t %llu", (unsigned long long)ino);
      
              ump = VFSTOUFS(mp);
              dev = ump->um_dev;
      retry:
  232         if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
                      return (0);
      
              /* Allocate a new vnode/inode. */
              if ((error = getnewvnode(VT_UFS, mp, &ffs_vops, &vp)) != 0) {
                      *vpp = NULL;
                      return (error);
              }
      
      #ifdef VFSLCKDEBUG
              vp->v_flag |= VLOCKSWORK;
      #endif
              ip = pool_get(&ffs_ino_pool, PR_WAITOK|PR_ZERO);
              rrw_init_flags(&ip->i_lock, "inode", RWL_DUPOK | RWL_IS_VNODE);
              ip->i_ump = ump;
              vref(ip->i_devvp);
              vp->v_data = ip;
              ip->i_vnode = vp;
              ip->i_fs = fs = ump->um_fs;
              ip->i_dev = dev;
              ip->i_number = ino;
              ip->i_vtbl = &ffs_vtbl;
      
              /*
               * Put it onto its hash chain and lock it so that other requests for
               * this inode will block if they arrive while we are sleeping waiting
               * for old data structures to be purged or for the contents of the
               * disk portion of this inode to be read.
               */
              error = ufs_ihashins(ip);
              
              if (error) {
                      /*
                       * VOP_INACTIVE will treat this as a stale file
                       * and recycle it quickly
                       */
                      vrele(vp);
      
                      if (error == EEXIST)
                              goto retry;
      
                      return (error);
              }
      
      
              /* Read in the disk contents for the inode, copy into the inode. */
              error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
                  (int)fs->fs_bsize, &bp);
              if (error) {
                      /*
                       * The inode does not contain anything useful, so it would
                       * be misleading to leave it on its hash chain. With mode
                       * still zero, it will be unlinked and returned to the free
                       * list by vput().
                       */
                      vput(vp);
                      brelse(bp);
                      *vpp = NULL;
                      return (error);
              }
      
      #ifdef FFS2
              if (ip->i_ump->um_fstype == UM_UFS2) {
                      ip->i_din2 = pool_get(&ffs_dinode2_pool, PR_WAITOK);
                      dp2 = (struct ufs2_dinode *) bp->b_data + ino_to_fsbo(fs, ino);
                      *ip->i_din2 = *dp2;
              } else
      #endif
              {
   97                 ip->i_din1 = pool_get(&ffs_dinode1_pool, PR_WAITOK);
                      dp1 = (struct ufs1_dinode *) bp->b_data + ino_to_fsbo(fs, ino);
                      *ip->i_din1 = *dp1;
              }
      
              brelse(bp);
      
              if (DOINGSOFTDEP(vp))
                      softdep_load_inodeblock(ip);
              else
   97                 ip->i_effnlink = DIP(ip, nlink);
      
              /*
               * Initialize the vnode from the inode, check for aliases.
               * Note that the underlying vnode may have changed.
               */
              if ((error = ffs_vinit(mp, &vp)) != 0) {
                      vput(vp);
                      *vpp = NULL;
                      return (error);
              }
      
              /*
               * Set up a generation number for this inode if it does not
               * already have one. This should only happen on old filesystems.
               */
   97         if (DIP(ip, gen) == 0) {
                      DIP_ASSIGN(ip, gen, arc4random() & INT_MAX);
                      if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1)
                              DIP_ASSIGN(ip, gen, 1);        /* Shouldn't happen */
                      if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
                              ip->i_flag |= IN_MODIFIED;
              }
      
              /*
               * Ensure that uid and gid are correct. This is a temporary
               * fix until fsck has been changed to do the update.
               */
   97         if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) {
                      ip->i_ffs1_uid = ip->i_din1->di_ouid;
                      ip->i_ffs1_gid = ip->i_din1->di_ogid;
              }
      
              *vpp = vp;
      
              return (0);
      }
      
      /*
       * File handle to vnode
       *
       * Have to be really careful about stale file handles.
       */
      int
      ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
      {
              struct ufid *ufhp;
              int error;
      
              ufhp = (struct ufid *)fhp;
              if (ufhp->ufid_len != sizeof(*ufhp))
                      return EINVAL;
      
              if ((error = ffs_checkrange(mp, ufhp->ufid_ino)) != 0)
                      return error;
      
              return (ufs_fhtovp(mp, ufhp, vpp));
      }
      
      /*
       * Vnode pointer to File handle
       */
      int
      ffs_vptofh(struct vnode *vp, struct fid *fhp)
      {
              struct inode *ip;
              struct ufid *ufhp;
      
              ip = VTOI(vp);
              ufhp = (struct ufid *)fhp;
              ufhp->ufid_len = sizeof(struct ufid);
              ufhp->ufid_ino = ip->i_number;
              ufhp->ufid_gen = DIP(ip, gen);
      
              return (0);
      }
      
      /*
       * Write a superblock and associated information back to disk.
       */
      int
      ffs_sbupdate(struct ufsmount *mp, int waitfor)
    2 {
              struct fs *dfs, *fs = mp->um_fs;
              struct buf *bp;
              int blks;
              caddr_t space;
              int i, size, error, allerror = 0;
      
              /*
               * First write back the summary information.
               */
              blks = howmany(fs->fs_cssize, fs->fs_fsize);
              space = (caddr_t)fs->fs_csp;
              for (i = 0; i < blks; i += fs->fs_frag) {
                      size = fs->fs_bsize;
                      if (i + fs->fs_frag > blks)
    2                         size = (blks - i) * fs->fs_fsize;
                      bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
                          size, 0, INFSLP);
                      memcpy(bp->b_data, space, size);
                      space += size;
                      if (waitfor != MNT_WAIT)
    2                         bawrite(bp);
                      else if ((error = bwrite(bp)))
                              allerror = error;
              }
      
              /*
               * Now write back the superblock itself. If any errors occurred
               * up to this point, then fail so that the superblock avoids
               * being written out as clean.
               */
    1         if (allerror) {
                      return (allerror);
              }
      
              bp = getblk(mp->um_devvp,
                  fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb),
                  (int)fs->fs_sbsize, 0, INFSLP);
              fs->fs_fmod = 0;
              fs->fs_time = time_second;
              memcpy(bp->b_data, fs, fs->fs_sbsize);
              /* Restore compatibility to old file systems.                   XXX */
              dfs = (struct fs *)bp->b_data;                                /* XXX */
    1         if (fs->fs_postblformat == FS_42POSTBLFMT)                /* XXX */
                      dfs->fs_nrpos = -1;                                /* XXX */
    1         if (fs->fs_inodefmt < FS_44INODEFMT) {                        /* XXX */
                      int32_t *lp, tmp;                                /* XXX */
                                                                      /* XXX */
                      lp = (int32_t *)&dfs->fs_qbmask;                /* XXX */
                      tmp = lp[4];                                        /* XXX */
                      for (i = 4; i > 0; i--)                                /* XXX */
                              lp[i] = lp[i-1];                        /* XXX */
                      lp[0] = tmp;                                        /* XXX */
              }                                                        /* XXX */
              dfs->fs_maxfilesize = mp->um_savedmaxfilesize;                /* XXX */
      
    1         ffs1_compat_write(dfs, mp);
      
              if (waitfor != MNT_WAIT)
    1                 bawrite(bp);
              else if ((error = bwrite(bp)))
                      allerror = error;
      
              return (allerror);
      }
      
      int
      ffs_init(struct vfsconf *vfsp)
      {
              static int done;
      
              if (done)
                      return (0);
      
              done = 1;
      
              pool_init(&ffs_ino_pool, sizeof(struct inode), 0, IPL_NONE,
                  PR_WAITOK, "ffsino", NULL);
              pool_init(&ffs_dinode1_pool, sizeof(struct ufs1_dinode), 0, IPL_NONE,
                  PR_WAITOK, "dino1pl", NULL);
      #ifdef FFS2
              pool_init(&ffs_dinode2_pool, sizeof(struct ufs2_dinode), 0, IPL_NONE,
                  PR_WAITOK, "dino2pl", NULL);
      #endif
      
              softdep_initialize();
      
              return (ufs_init(vfsp));
      }
      
      /*
       * fast filesystem related variables.
       */
      int
      ffs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
          size_t newlen, struct proc *p)
      {
      #ifdef FFS_SOFTUPDATES
              extern int max_softdeps, tickdelay, stat_worklist_push;
              extern int stat_blk_limit_push, stat_ino_limit_push, stat_blk_limit_hit;
              extern int stat_ino_limit_hit, stat_sync_limit_hit, stat_indir_blk_ptrs;
              extern int stat_inode_bitmap, stat_direct_blk_ptrs, stat_dir_entry;
      #endif
      
              /* all sysctl names at this level are terminal */
              if (namelen != 1)
                      return (ENOTDIR);                /* overloaded */
      
              switch (name[0]) {
              case FFS_CLUSTERREAD:
              case FFS_CLUSTERWRITE:
              case FFS_REALLOCBLKS:
              case FFS_ASYNCFREE:
                      return (EOPNOTSUPP);
      #ifdef FFS_SOFTUPDATES
              case FFS_MAX_SOFTDEPS:
                      return (sysctl_int(oldp, oldlenp, newp, newlen, &max_softdeps));
              case FFS_SD_TICKDELAY:
                      return (sysctl_int(oldp, oldlenp, newp, newlen, &tickdelay));
              case FFS_SD_WORKLIST_PUSH:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_worklist_push));
              case FFS_SD_BLK_LIMIT_PUSH:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_blk_limit_push));
              case FFS_SD_INO_LIMIT_PUSH:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_ino_limit_push));
              case FFS_SD_BLK_LIMIT_HIT:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_blk_limit_hit));
              case FFS_SD_INO_LIMIT_HIT:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_ino_limit_hit));
              case FFS_SD_SYNC_LIMIT_HIT:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_sync_limit_hit));
              case FFS_SD_INDIR_BLK_PTRS:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_indir_blk_ptrs));
              case FFS_SD_INODE_BITMAP:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_inode_bitmap));
              case FFS_SD_DIRECT_BLK_PTRS:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_direct_blk_ptrs));
              case FFS_SD_DIR_ENTRY:
                      return (sysctl_rdint(oldp, oldlenp, newp, stat_dir_entry));
      #endif
      #ifdef UFS_DIRHASH
              case FFS_DIRHASH_DIRSIZE:
                      return (sysctl_int(oldp, oldlenp, newp, newlen,
                          &ufs_mindirhashsize));
              case FFS_DIRHASH_MAXMEM:
                      return (sysctl_int(oldp, oldlenp, newp, newlen,
                          &ufs_dirhashmaxmem));
              case FFS_DIRHASH_MEM:
                      return (sysctl_rdint(oldp, oldlenp, newp, ufs_dirhashmem));
      #endif
      
              default:
                      return (EOPNOTSUPP);
              }
              /* NOTREACHED */
      }
      /*        $OpenBSD: kern_acct.c,v 1.40 2019/08/15 07:29:21 anton Exp $        */
      /*        $NetBSD: kern_acct.c,v 1.42 1996/02/04 02:15:12 christos Exp $        */
      
      /*-
       * Copyright (c) 1994 Christopher G. Demetriou
       * Copyright (c) 1982, 1986, 1989, 1993
       *        The Regents of the University of California.  All rights reserved.
       * (c) UNIX System Laboratories, Inc.
       * All or some portions of this file are derived from material licensed
       * to the University of California by American Telephone and Telegraph
       * Co. or Unix System Laboratories, Inc. and are reproduced herein with
       * the permission of UNIX System Laboratories, Inc.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)kern_acct.c        8.1 (Berkeley) 6/14/93
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/proc.h>
      #include <sys/mount.h>
      #include <sys/vnode.h>
      #include <sys/fcntl.h>
      #include <sys/syslog.h>
      #include <sys/kernel.h>
      #include <sys/namei.h>
      #include <sys/errno.h>
      #include <sys/acct.h>
      #include <sys/resourcevar.h>
      #include <sys/ioctl.h>
      #include <sys/tty.h>
      #include <sys/kthread.h>
      #include <sys/rwlock.h>
      
      #include <sys/syscallargs.h>
      
      /*
       * The routines implemented in this file are described in:
       *      Leffler, et al.: The Design and Implementation of the 4.3BSD
       *            UNIX Operating System (Addison Welley, 1989)
       * on pages 62-63.
       *
       * Arguably, to simplify accounting operations, this mechanism should
       * be replaced by one in which an accounting log file (similar to /dev/klog)
       * is read by a user process, etc.  However, that has its own problems.
       */
      
      /*
       * Internal accounting functions.
       */
      comp_t        encode_comp_t(u_long, u_long);
      int        acct_start(void);
      void        acct_thread(void *);
      void        acct_shutdown(void);
      
      /*
       * Accounting vnode pointer, and saved vnode pointer.
       */
      struct        vnode *acctp;
      struct        vnode *savacctp;
      
      /*
       * Lock protecting acctp and savacctp.
       */
      struct        rwlock acct_lock = RWLOCK_INITIALIZER("acctlk");
      
      /*
       * Values associated with enabling and disabling accounting
       */
      int        acctsuspend = 2;        /* stop accounting when < 2% free space left */
      int        acctresume = 4;                /* resume when free space risen to > 4% */
      int        acctrate = 15;                /* delay (in seconds) between space checks */
      
      struct proc *acct_proc;
      
      /*
       * Accounting system call.  Written based on the specification and
       * previous implementation done by Mark Tinguely.
       */
      int
      sys_acct(struct proc *p, void *v, register_t *retval)
    1 {
              struct sys_acct_args /* {
                      syscallarg(const char *) path;
              } */ *uap = v;
              struct nameidata nd;
              int error;
      
              /* Make sure that the caller is root. */
    1         if ((error = suser(p)) != 0)
                      return (error);
      
              /*
               * If accounting is to be started to a file, open that file for
               * writing and make sure it's 'normal'.
               */
              if (SCARG(uap, path) != NULL) {
                      NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path),
                          p);
                      if ((error = vn_open(&nd, FWRITE|O_APPEND, 0)) != 0)
                              return (error);
                      VOP_UNLOCK(nd.ni_vp);
                      if (nd.ni_vp->v_type != VREG) {
                              vn_close(nd.ni_vp, FWRITE, p->p_ucred, p);
                              return (EACCES);
                      }
              }
      
              rw_enter_write(&acct_lock);
      
              /*
               * If accounting was previously enabled, kill the old space-watcher,
               * close the file, and (if no new file was specified, leave).
               */
              if (acctp != NULL || savacctp != NULL) {
                      wakeup(&acct_proc);
                      (void)vn_close((acctp != NULL ? acctp : savacctp), FWRITE,
                          p->p_ucred, p);
                      acctp = savacctp = NULL;
              }
              if (SCARG(uap, path) == NULL)
                      goto out;
      
              /*
               * Save the new accounting file vnode, and schedule the new
               * free space watcher.
               */
              acctp = nd.ni_vp;
              if ((error = acct_start()) != 0) {
                      acctp = NULL;
                      (void)vn_close(nd.ni_vp, FWRITE, p->p_ucred, p);
              }
      
      out:
              rw_exit_write(&acct_lock);
              return (error);
      }
      
      /*
       * Write out process accounting information, on process exit.
       * Data to be written out is specified in Leffler, et al.
       * and are enumerated below.  (They're also noted in the system
       * "acct.h" header file.)
       */
      int
      acct_process(struct proc *p)
      {
              struct acct acct;
              struct process *pr = p->p_p;
              struct rusage *r;
              struct timespec ut, st, tmp;
              int t;
              struct vnode *vp;
              int error = 0;
      
              /* If accounting isn't enabled, don't bother */
              if (acctp == NULL)
                      return (0);
      
              rw_enter_read(&acct_lock);
      
              /*
               * Check the vnode again in case accounting got disabled while waiting
               * for the lock.
               */
              vp = acctp;
              if (vp == NULL)
                      goto out;
      
              /*
               * Get process accounting information.
               */
      
              /* (1) The name of the command that ran */
              memcpy(acct.ac_comm, pr->ps_comm, sizeof acct.ac_comm);
      
              /* (2) The amount of user and system time that was used */
              calctsru(&pr->ps_tu, &ut, &st, NULL);
              acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_nsec);
              acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_nsec);
      
              /* (3) The elapsed time the command ran (and its starting time) */
              acct.ac_btime = pr->ps_start.tv_sec;
              getnanotime(&tmp);
              timespecsub(&tmp, &pr->ps_start, &tmp);
              acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_nsec);
      
              /* (4) The average amount of memory used */
              r = &p->p_ru;
              timespecadd(&ut, &st, &tmp);
              t = tmp.tv_sec * hz + tmp.tv_nsec / (1000 * tick);
              if (t)
                      acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t;
              else
                      acct.ac_mem = 0;
      
              /* (5) The number of disk I/O operations done */
              acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0);
      
              /* (6) The UID and GID of the process */
              acct.ac_uid = pr->ps_ucred->cr_ruid;
              acct.ac_gid = pr->ps_ucred->cr_rgid;
      
              /* (7) The terminal from which the process was started */
              if ((pr->ps_flags & PS_CONTROLT) &&
                  pr->ps_pgrp->pg_session->s_ttyp)
                      acct.ac_tty = pr->ps_pgrp->pg_session->s_ttyp->t_dev;
              else
                      acct.ac_tty = NODEV;
      
              /* (8) The boolean flags that tell how the process terminated, etc. */
              acct.ac_flag = pr->ps_acflag;
      
              /*
               * Now, just write the accounting information to the file.
               */
              error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct),
                  (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT|IO_NOLIMIT,
                  p->p_ucred, NULL, p);
      
      out:
              rw_exit_read(&acct_lock);
              return (error);
      }
      
      /*
       * Encode_comp_t converts from ticks in seconds and microseconds
       * to ticks in 1/AHZ seconds.  The encoding is described in
       * Leffler, et al., on page 63.
       */
      
      #define        MANTSIZE        13                        /* 13 bit mantissa. */
      #define        EXPSIZE                3                        /* Base 8 (3 bit) exponent. */
      #define        MAXFRACT        ((1 << MANTSIZE) - 1)        /* Maximum fractional value. */
      
      comp_t
      encode_comp_t(u_long s, u_long ns)
      {
              int exp, rnd;
      
              exp = 0;
              rnd = 0;
              s *= AHZ;
              s += ns / (1000000000 / AHZ);        /* Maximize precision. */
      
              while (s > MAXFRACT) {
              rnd = s & (1 << (EXPSIZE - 1));        /* Round up? */
                      s >>= EXPSIZE;                /* Base 8 exponent == 3 bit shift. */
                      exp++;
              }
      
              /* If we need to round up, do it (and handle overflow correctly). */
              if (rnd && (++s > MAXFRACT)) {
                      s >>= EXPSIZE;
                      exp++;
              }
      
              /* Clean it up and polish it off. */
              exp <<= MANTSIZE;                /* Shift the exponent into place */
              exp += s;                        /* and add on the mantissa. */
              return (exp);
      }
      
      int
      acct_start(void)
      {
              /* Already running. */
              if (acct_proc != NULL)
                      return (0);
      
              return (kthread_create(acct_thread, NULL, &acct_proc, "acct"));
      }
      
      /*
       * Periodically check the file system to see if accounting
       * should be turned on or off.  Beware the case where the vnode
       * has been vgone()'d out from underneath us, e.g. when the file
       * system containing the accounting file has been forcibly unmounted.
       */
      void
      acct_thread(void *arg)
      {
              struct statfs sb;
              struct proc *p = curproc;
      
              rw_enter_write(&acct_lock);
              for (;;) {
                      if (savacctp != NULL) {
                              if (savacctp->v_type == VBAD) {
                                      (void) vn_close(savacctp, FWRITE, NOCRED, p);
                                      savacctp = NULL;
                                      break;
                              }
                              (void)VFS_STATFS(savacctp->v_mount, &sb, NULL);
                              if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
                                      acctp = savacctp;
                                      savacctp = NULL;
                                      log(LOG_NOTICE, "Accounting resumed\n");
                              }
                      } else if (acctp != NULL) {
                              if (acctp->v_type == VBAD) {
                                      (void) vn_close(acctp, FWRITE, NOCRED, p);
                                      acctp = NULL;
                                      break;
                              }
                              (void)VFS_STATFS(acctp->v_mount, &sb, NULL);
                              if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
                                      savacctp = acctp;
                                      acctp = NULL;
                                      log(LOG_NOTICE, "Accounting suspended\n");
                              }
                      } else {
                              break;
                      }
                      rwsleep_nsec(&acct_proc, &acct_lock, PPAUSE, "acct",
                          SEC_TO_NSEC(acctrate));
              }
              acct_proc = NULL;
              rw_exit_write(&acct_lock);
              kthread_exit(0);
      }
      
      void
      acct_shutdown(void)
      {
      
              struct proc *p = curproc;
      
              rw_enter_write(&acct_lock);
              if (acctp != NULL || savacctp != NULL) {
                      vn_close((acctp != NULL ? acctp : savacctp), FWRITE,
                          NOCRED, p);
                      acctp = savacctp = NULL;
              }
              rw_exit_write(&acct_lock);
      }
      /*        $OpenBSD: uvm_user.c,v 1.14 2014/09/14 14:17:27 jsg Exp $        */
      /*        $NetBSD: uvm_user.c,v 1.8 2000/06/27 17:29:37 mrg Exp $        */
      
      /*
       * Copyright (c) 1997 Charles D. Cranor and Washington University.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       *
       * from: Id: uvm_user.c,v 1.1.2.1 1997/08/14 19:10:41 chuck Exp
       */
      
      /*
       * uvm_user.c: high level uvm_allocate/uvm_deallocate interface into vm.
       */
      
      
      #include <sys/param.h>
      #include <sys/systm.h>
      
      #include <uvm/uvm.h>
      
      /*
       * uvm_deallocate: deallocate memory (unmap)
       */
      void
      uvm_deallocate(struct vm_map *map, vaddr_t start, vsize_t size)
   10 {
      
              if (map == NULL)
                      panic("uvm_deallocate with null map");
      
              if (size == 0)
                      return;
      
   10         uvm_unmap(map, trunc_page(start), round_page(start+size));
      }
      /*        $OpenBSD: uipc_socket.c,v 1.234 2019/07/22 15:34:07 robert Exp $        */
      /*        $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $        */
      
      /*
       * Copyright (c) 1982, 1986, 1988, 1990, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)uipc_socket.c        8.3 (Berkeley) 4/15/94
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/proc.h>
      #include <sys/file.h>
      #include <sys/filedesc.h>
      #include <sys/malloc.h>
      #include <sys/mbuf.h>
      #include <sys/domain.h>
      #include <sys/kernel.h>
      #include <sys/event.h>
      #include <sys/protosw.h>
      #include <sys/socket.h>
      #include <sys/unpcb.h>
      #include <sys/socketvar.h>
      #include <sys/signalvar.h>
      #include <net/if.h>
      #include <sys/pool.h>
      #include <sys/atomic.h>
      #include <sys/rwlock.h>
      
      #ifdef DDB
      #include <machine/db_machdep.h>
      #endif
      
      void        sbsync(struct sockbuf *, struct mbuf *);
      
      int        sosplice(struct socket *, int, off_t, struct timeval *);
      void        sounsplice(struct socket *, struct socket *, int);
      void        soidle(void *);
      void        sotask(void *);
      void        soreaper(void *);
      void        soput(void *);
      int        somove(struct socket *, int);
      
      void        filt_sordetach(struct knote *kn);
      int        filt_soread(struct knote *kn, long hint);
      void        filt_sowdetach(struct knote *kn);
      int        filt_sowrite(struct knote *kn, long hint);
      int        filt_solisten(struct knote *kn, long hint);
      
      struct filterops solisten_filtops =
              { 1, NULL, filt_sordetach, filt_solisten };
      struct filterops soread_filtops =
              { 1, NULL, filt_sordetach, filt_soread };
      struct filterops sowrite_filtops =
              { 1, NULL, filt_sowdetach, filt_sowrite };
      
      
      #ifndef SOMINCONN
      #define SOMINCONN 80
      #endif /* SOMINCONN */
      
      int        somaxconn = SOMAXCONN;
      int        sominconn = SOMINCONN;
      
      struct pool socket_pool;
      #ifdef SOCKET_SPLICE
      struct pool sosplice_pool;
      struct taskq *sosplice_taskq;
      struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk");
      #endif
      
      void
      soinit(void)
      {
              pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0,
                  "sockpl", NULL);
      #ifdef SOCKET_SPLICE
              pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0,
                  "sosppl", NULL);
      #endif
      }
      
      /*
       * Socket operation routines.
       * These routines are called by the routines in
       * sys_socket.c or from a system process, and
       * implement the semantics of socket operations by
       * switching out to the protocol specific routines.
       */
      int
      socreate(int dom, struct socket **aso, int type, int proto)
   62 {
              struct proc *p = curproc;                /* XXX */
              const struct protosw *prp;
              struct socket *so;
              int error, s;
      
              if (proto)
   16                 prp = pffindproto(dom, proto, type);
              else
   46                 prp = pffindtype(dom, type);
   10         if (prp == NULL || prp->pr_attach == NULL)
                      return (EPROTONOSUPPORT);
              if (prp->pr_type != type)
                      return (EPROTOTYPE);
              so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
              sigio_init(&so->so_sigio);
              TAILQ_INIT(&so->so_q0);
              TAILQ_INIT(&so->so_q);
              so->so_type = type;
   52         if (suser(p) == 0)
                      so->so_state = SS_PRIV;
              so->so_ruid = p->p_ucred->cr_ruid;
              so->so_euid = p->p_ucred->cr_uid;
              so->so_rgid = p->p_ucred->cr_rgid;
              so->so_egid = p->p_ucred->cr_gid;
              so->so_cpid = p->p_p->ps_pid;
              so->so_proto = prp;
      
              s = solock(so);
              error = (*prp->pr_attach)(so, proto);
              if (error) {
    6                 so->so_state |= SS_NOFDREF;
                      /* sofree() calls sounlock(). */
                      sofree(so, s);
                      return (error);
              }
   46         sounlock(so, s);
              *aso = so;
              return (0);
      }
      
      int
      sobind(struct socket *so, struct mbuf *nam, struct proc *p)
   71 {
              int error;
      
              soassertlocked(so);
      
              error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
              return (error);
      }
      
      int
      solisten(struct socket *so, int backlog)
   16 {
              int s, error;
      
    1         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
                      return (EINVAL);
      #ifdef SOCKET_SPLICE
   15         if (isspliced(so) || issplicedback(so))
                      return (EOPNOTSUPP);
      #endif /* SOCKET_SPLICE */
              s = solock(so);
              error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
                  curproc);
              if (error) {
    4                 sounlock(so, s);
                      return (error);
              }
              if (TAILQ_FIRST(&so->so_q) == NULL)
    9                 so->so_options |= SO_ACCEPTCONN;
              if (backlog < 0 || backlog > somaxconn)
                      backlog = somaxconn;
              if (backlog < sominconn)
                      backlog = sominconn;
              so->so_qlimit = backlog;
              sounlock(so, s);
              return (0);
      }
      
      #define SOSP_FREEING_READ        1
      #define SOSP_FREEING_WRITE        2
      void
      sofree(struct socket *so, int s)
   91 {
              soassertlocked(so);
      
   42         if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
                      sounlock(so, s);
                      return;
              }
   82         if (so->so_head) {
                      /*
                       * We must not decommission a socket that's on the accept(2)
                       * queue.  If we do, then accept(2) may hang after select(2)
                       * indicated that the listening socket was ready.
                       */
                      if (!soqremque(so, 0)) {
                              sounlock(so, s);
                              return;
                      }
              }
              sigio_free(&so->so_sigio);
      #ifdef SOCKET_SPLICE
   76         if (so->so_sp) {
    4                 if (issplicedback(so)) {
                              int freeing = SOSP_FREEING_WRITE;
      
                              if (so->so_sp->ssp_soback == so)
                                      freeing |= SOSP_FREEING_READ;
                              sounsplice(so->so_sp->ssp_soback, so, freeing);
                      }
    4                 if (isspliced(so)) {
    2                         int freeing = SOSP_FREEING_READ;
      
                              if (so == so->so_sp->ssp_socket)
                                      freeing |= SOSP_FREEING_WRITE;
                              sounsplice(so, so->so_sp->ssp_socket, freeing);
                      }
              }
      #endif /* SOCKET_SPLICE */
              sbrelease(so, &so->so_snd);
              sorflush(so);
              sounlock(so, s);
      #ifdef SOCKET_SPLICE
              if (so->so_sp) {
                      /* Reuse splice idle, sounsplice() has been called before. */
    6                 timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so);
                      timeout_add(&so->so_sp->ssp_idleto, 0);
              } else
      #endif /* SOCKET_SPLICE */
              {
   76                 pool_put(&socket_pool, so);
              }
      }
      
      /*
       * Close a socket on last file table reference removal.
       * Initiate disconnect if connected.
       * Free socket when disconnect complete.
       */
      int
      soclose(struct socket *so, int flags)
   76 {
              struct socket *so2;
              int s, error = 0;
      
              s = solock(so);
              /* Revoke async IO early. There is a final revocation in sofree(). */
              sigio_free(&so->so_sigio);
   73         if (so->so_options & SO_ACCEPTCONN) {
    3                 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
                              (void) soqremque(so2, 0);
                              (void) soabort(so2);
                      }
    3                 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
                              (void) soqremque(so2, 1);
                              (void) soabort(so2);
                      }
              }
    2         if (so->so_pcb == NULL)
                      goto discard;
   36         if (so->so_state & SS_ISCONNECTED) {
                      if ((so->so_state & SS_ISDISCONNECTING) == 0) {
                              error = sodisconnect(so);
   47                         if (error)
                                      goto drop;
                      }
   44                 if (so->so_options & SO_LINGER) {
                              if ((so->so_state & SS_ISDISCONNECTING) &&
                                  (flags & MSG_DONTWAIT))
                                      goto drop;
    1                         while (so->so_state & SS_ISCONNECTED) {
                                      error = sosleep(so, &so->so_timeo,
                                          PSOCK | PCATCH, "netcls",
                                          so->so_linger * hz);
                                      if (error)
                                              break;
                              }
                      }
              }
      drop:
              if (so->so_pcb) {
                      int error2;
                      KASSERT(so->so_proto->pr_detach);
   74                 error2 = (*so->so_proto->pr_detach)(so);
                      if (error == 0)
                              error = error2;
              }
      discard:
              if (so->so_state & SS_NOFDREF)
                      panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
   76         so->so_state |= SS_NOFDREF;
              /* sofree() calls sounlock(). */
              sofree(so, s);
              return (error);
      }
      
      int
      soabort(struct socket *so)
      {
              soassertlocked(so);
      
              return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
                 curproc);
      }
      
      int
      soaccept(struct socket *so, struct mbuf *nam)
      {
              int error = 0;
      
              soassertlocked(so);
      
              if ((so->so_state & SS_NOFDREF) == 0)
                      panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
              so->so_state &= ~SS_NOFDREF;
              if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
                  (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
                      error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL,
                          nam, NULL, curproc);
              else
                      error = ECONNABORTED;
              return (error);
      }
      
      int
      soconnect(struct socket *so, struct mbuf *nam)
  189 {
              int error;
      
              soassertlocked(so);
      
    2         if (so->so_options & SO_ACCEPTCONN)
                      return (EOPNOTSUPP);
              /*
               * If protocol is connection-based, can only connect once.
               * Otherwise, if connected, try to disconnect first.
               * This allows user to disconnect by connecting to, e.g.,
               * a null address.
               */
  186         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
    1             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
                  (error = sodisconnect(so))))
                      error = EISCONN;
              else
                      error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
                          NULL, nam, NULL, curproc);
              return (error);
      }
      
      int
      soconnect2(struct socket *so1, struct socket *so2)
   27 {
              int s, error;
      
              s = solock(so1);
              error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
                  (struct mbuf *)so2, NULL, curproc);
              sounlock(so1, s);
              return (error);
      }
      
      int
      sodisconnect(struct socket *so)
      {
              int error;
      
              soassertlocked(so);
      
              if ((so->so_state & SS_ISCONNECTED) == 0)
                      return (ENOTCONN);
              if (so->so_state & SS_ISDISCONNECTING)
                      return (EALREADY);
              error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL,
                  NULL, curproc);
              return (error);
      }
      
      int m_getuio(struct mbuf **, int, long, struct uio *);
      
      #define        SBLOCKWAIT(f)        (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
      /*
       * Send on a socket.
       * If send must go all at once and message is larger than
       * send buffering, then hard error.
       * Lock against other senders.
       * If must go all at once and not enough room now, then
       * inform user that this would block and do nothing.
       * Otherwise, if nonblocking, send as much as possible.
       * The data to be sent is described by "uio" if nonzero,
       * otherwise by the mbuf chain "top" (which must be null
       * if uio is not).  Data provided in mbuf chain must be small
       * enough to send all at once.
       *
       * Returns nonzero on error, timeout or signal; callers
       * must check for short counts if EINTR/ERESTART are returned.
       * Data and control buffers are freed on return.
       */
      int
    1 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
    1     struct mbuf *control, int flags)
  338 {
              long space, clen = 0;
              size_t resid;
              int error, s;
              int atomic = sosendallatonce(so) || top;
      
              if (uio)
  338                 resid = uio->uio_resid;
              else
                      resid = top->m_pkthdr.len;
              /* MSG_EOR on a SOCK_STREAM socket is invalid. */
              if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
                      m_freem(top);
                      m_freem(control);
                      return (EINVAL);
              }
              if (uio && uio->uio_procp)
  337                 uio->uio_procp->p_ru.ru_msgsnd++;
  248         if (control) {
                      /*
                       * In theory clen should be unsigned (since control->m_len is).
                       * However, space must be signed, as it might be less than 0
                       * if we over-committed, and we must use a signed comparison
                       * of space and clen.
                       */
                      clen = control->m_len;
                      /* reserve extra space for AF_UNIX's internalize */
   62                 if (so->so_proto->pr_domain->dom_family == AF_UNIX &&
                          clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) &&
                          mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS)
   28                         clen = CMSG_SPACE(
                                  (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) *
                                  (sizeof(struct fdpass) / sizeof(int)));
              }
      
      #define        snderr(errno)        { error = errno; goto release; }
      
              s = solock(so);
      restart:
              if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0)
                      goto out;
              so->so_state |= SS_ISSENDING;
              do {
    7                 if (so->so_state & SS_CANTSENDMORE)
                              snderr(EPIPE);
                      if (so->so_error) {
                              error = so->so_error;
                              so->so_error = 0;
                              snderr(error);
                      }
  289                 if ((so->so_state & SS_ISCONNECTED) == 0) {
                              if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
    6                                 if (!(resid == 0 && clen != 0))
                                              snderr(ENOTCONN);
   42                         } else if (addr == 0)
                                      snderr(EDESTADDRREQ);
                      }
                      space = sbspace(so, &so->so_snd);
                      if (flags & MSG_OOB)
                              space += 1024;
                      if (so->so_proto->pr_domain->dom_family == AF_UNIX) {
   92                         if (atomic && resid > so->so_snd.sb_hiwat)
                                      snderr(EMSGSIZE);
                      } else {
    3                         if (clen > so->so_snd.sb_hiwat ||
  239                             (atomic && resid > so->so_snd.sb_hiwat - clen))
                                      snderr(EMSGSIZE);
                      }
   10                 if (space < clen ||
  321                     (space - clen < resid &&
    2                     (atomic || space < so->so_snd.sb_lowat))) {
    4                         if (flags & MSG_DONTWAIT)
                                      snderr(EWOULDBLOCK);
                              sbunlock(so, &so->so_snd);
                              error = sbwait(so, &so->so_snd);
                              so->so_state &= ~SS_ISSENDING;
    8                         if (error)
                                      goto out;
                              goto restart;
                      }
                      space -= clen;
                      do {
                              if (uio == NULL) {
                                      /*
                                       * Data is prepackaged in "top".
                                       */
                                      resid = 0;
                                      if (flags & MSG_EOR)
                                              top->m_flags |= M_EOR;
                              } else {
                                      sounlock(so, s);
                                      error = m_getuio(&top, atomic, space, uio);
                                      s = solock(so);
    2                                 if (error)
                                              goto release;
                                      space -= top->m_pkthdr.len;
                                      resid = uio->uio_resid;
  317                                 if (flags & MSG_EOR)
    8                                         top->m_flags |= M_EOR;
                              }
  325                         if (resid == 0)
                                      so->so_state &= ~SS_ISSENDING;
  321                         if (top && so->so_options & SO_ZEROIZE)
    4                                 top->m_flags |= M_ZEROIZE;
                              error = (*so->so_proto->pr_usrreq)(so,
                                  (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
                                  top, addr, control, curproc);
                              clen = 0;
                              control = NULL;
                              top = NULL;
  151                         if (error)
                                      goto release;
                      } while (resid && space > 0);
  171         } while (resid);
      
      release:
              so->so_state &= ~SS_ISSENDING;
              sbunlock(so, &so->so_snd);
      out:
              sounlock(so, s);
              m_freem(top);
              m_freem(control);
              return (error);
      }
      
      int
      m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio)
  326 {
  326         struct mbuf *m, *top = NULL;
              struct mbuf **nextp = &top;
              u_long len, mlen;
              size_t resid = uio->uio_resid;
              int error;
      
              do {
                      if (top == NULL) {
  326                         MGETHDR(m, M_WAIT, MT_DATA);
                              mlen = MHLEN;
                              m->m_pkthdr.len = 0;
                              m->m_pkthdr.ph_ifidx = 0;
                      } else {
   76                         MGET(m, M_WAIT, MT_DATA);
                              mlen = MLEN;
                      }
                      /* chain mbuf together */
                      *nextp = m;
                      nextp = &m->m_next;
      
                      resid = ulmin(resid, space);
  268                 if (resid >= MINCLSIZE) {
                              MCLGETI(m, M_NOWAIT, NULL, ulmin(resid, MAXMCLBYTES));
   60                         if ((m->m_flags & M_EXT) == 0)
                                      MCLGETI(m, M_NOWAIT, NULL, MCLBYTES);
                              if ((m->m_flags & M_EXT) == 0)
                                      goto nopages;
                              mlen = m->m_ext.ext_size;
                              len = ulmin(mlen, resid);
                              /*
                               * For datagram protocols, leave room
                               * for protocol headers in first mbuf.
                               */
   22                         if (atomic && m == top && len < mlen - max_hdr)
   38                                 m->m_data += max_hdr;
                      } else {
      nopages:
                              len = ulmin(mlen, resid);
                              /*
                               * For datagram protocols, leave room
                               * for protocol headers in first mbuf.
                               */
  109                         if (atomic && m == top && len < mlen - max_hdr)
  159                                 m_align(m, len);
                      }
      
                      error = uiomove(mtod(m, caddr_t), len, uio);
                      if (error) {
                              m_freem(top);
                              return (error);
                      }
      
                      /* adjust counters */
                      resid = uio->uio_resid;
                      space -= len;
                      m->m_len = len;
                      top->m_pkthdr.len += len;
      
                      /* Is there more space and more data? */
   76         } while (space > 0 && resid > 0);
      
              *mp = top;
              return 0;
      }
      
      /*
       * Following replacement or removal of the first mbuf on the first
       * mbuf chain of a socket buffer, push necessary state changes back
       * into the socket buffer so that other consumers see the values
       * consistently.  'nextrecord' is the callers locally stored value of
       * the original value of sb->sb_mb->m_nextpkt which must be restored
       * when the lead mbuf changes.  NOTE: 'nextrecord' may be NULL.
       */
      void
      sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
      {
      
              /*
               * First, update for the new value of nextrecord.  If necessary,
               * make it the first record.
               */
              if (sb->sb_mb != NULL)
   39                 sb->sb_mb->m_nextpkt = nextrecord;
              else
                      sb->sb_mb = nextrecord;
      
              /*
               * Now update any dependent socket buffer fields to reflect
               * the new state.  This is an inline of SB_EMPTY_FIXUP, with
               * the addition of a second clause that takes care of the
               * case where sb_mb has been updated, but remains the last
               * record.
               */
              if (sb->sb_mb == NULL) {
                      sb->sb_mbtail = NULL;
                      sb->sb_lastrecord = NULL;
   26         } else if (sb->sb_mb->m_nextpkt == NULL)
   24                 sb->sb_lastrecord = sb->sb_mb;
      }
      
      /*
       * Implement receive operations on a socket.
       * We depend on the way that records are added to the sockbuf
       * by sbappend*.  In particular, each record (mbufs linked through m_next)
       * must begin with an address if the protocol so specifies,
       * followed by an optional mbuf or mbufs containing ancillary data,
       * and then zero or more mbufs of data.
       * In order to avoid blocking network for the entire time here, we release
       * the solock() while doing the actual copy to user space.
       * Although the sockbuf is locked, new data may still be appended,
       * and thus we must maintain consistency of the sockbuf during that time.
       *
       * The caller may receive the data as a single mbuf chain by supplying
       * an mbuf **mp0 for use in returning the chain.  The uio is then used
       * only for the count in uio_resid.
       */
      int
      soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
          struct mbuf **mp0, struct mbuf **controlp, int *flagsp,
          socklen_t controllen)
  105 {
              struct mbuf *m, **mp;
              struct mbuf *cm;
              u_long len, offset, moff;
    4         int flags, error, s, type, uio_error = 0;
              const struct protosw *pr = so->so_proto;
              struct mbuf *nextrecord;
   16         size_t resid, orig_resid = uio->uio_resid;
      
              mp = mp0;
   22         if (paddr)
   83                 *paddr = NULL;
   80         if (controlp)
   25                 *controlp = NULL;
              if (flagsp)
                      flags = *flagsp &~ MSG_EOR;
              else
                      flags = 0;
   99         if (flags & MSG_OOB) {
                      m = m_get(M_WAIT, MT_DATA);
                      s = solock(so);
                      error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
                          (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc);
                      sounlock(so, s);
    6                 if (error)
                              goto bad;
                      do {
                              error = uiomove(mtod(m, caddr_t),
                                  ulmin(uio->uio_resid, m->m_len), uio);
                              m = m_free(m);
                      } while (uio->uio_resid && error == 0 && m);
      bad:
                      m_freem(m);
                      return (error);
              }
   99         if (mp)
                      *mp = NULL;
      
              s = solock(so);
      restart:
              if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
    2                 sounlock(so, s);
                      return (error);
              }
      
              m = so->so_rcv.sb_mb;
      #ifdef SOCKET_SPLICE
              if (isspliced(so))
                      m = NULL;
      #endif /* SOCKET_SPLICE */
              /*
               * If we have less data than requested, block awaiting more
               * (subject to any timeout) if:
               *   1. the current count is less than the low water mark,
               *   2. MSG_WAITALL is set, and it is possible to do the entire
               *        receive operation at once if we block (resid <= hiwat), or
               *   3. MSG_DONTWAIT is not set.
               * If MSG_WAITALL is set but resid is larger than the receive buffer,
               * we have to do the receive in sections, and thus risk returning
               * a short count if a timeout or signal occurs after we start.
               */
   96         if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
   42             so->so_rcv.sb_cc < uio->uio_resid) &&
    5             (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
   31             ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
    4             m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
      #ifdef DIAGNOSTIC
   50                 if (m == NULL && so->so_rcv.sb_cc)
      #ifdef SOCKET_SPLICE
                          if (!isspliced(so))
      #endif /* SOCKET_SPLICE */
                              panic("receive 1: so %p, so_type %d, sb_cc %lu",
                                  so, so->so_type, so->so_rcv.sb_cc);
      #endif
                      if (so->so_error) {
                              if (m)
                                      goto dontblock;
                              error = so->so_error;
    1                         if ((flags & MSG_PEEK) == 0)
    1                                 so->so_error = 0;
                              goto release;
                      }
   50                 if (so->so_state & SS_CANTRCVMORE) {
    4                         if (m)
                                      goto dontblock;
   13                         else if (so->so_rcv.sb_cc == 0)
                                      goto release;
                      }
   50                 for (; m; m = m->m_next)
                              if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
                                      m = so->so_rcv.sb_mb;
                                      goto dontblock;
                              }
   50                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
                          (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
                              error = ENOTCONN;
                              goto release;
                      }
    5                 if (uio->uio_resid == 0 && controlp == NULL)
                              goto release;
                      if (flags & MSG_DONTWAIT) {
                              error = EWOULDBLOCK;
                              goto release;
                      }
                      SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
                      SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
                      sbunlock(so, &so->so_rcv);
                      error = sbwait(so, &so->so_rcv);
   38                 if (error) {
    8                         sounlock(so, s);
                              return (error);
                      }
                      goto restart;
              }
      dontblock:
              /*
               * On entry here, m points to the first record of the socket buffer.
               * From this point onward, we maintain 'nextrecord' as a cache of the
               * pointer to the next record in the socket buffer.  We must keep the
               * various socket buffer pointers and local stack versions of the
               * pointers in sync, pushing out modifications before operations that
               * may sleep, and re-reading them afterwards.
               *
               * Otherwise, we will race with the network stack appending new data
               * or records onto the socket buffer by using inconsistent/stale
               * versions of the field, possibly resulting in socket buffer
               * corruption.
               */
              if (uio->uio_procp)
   76                 uio->uio_procp->p_ru.ru_msgrcv++;
              KASSERT(m == so->so_rcv.sb_mb);
              SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
              SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
              nextrecord = m->m_nextpkt;
   41         if (pr->pr_flags & PR_ADDR) {
      #ifdef DIAGNOSTIC
                      if (m->m_type != MT_SONAME)
                              panic("receive 1a: so %p, so_type %d, m %p, m_type %d",
                                  so, so->so_type, m, m->m_type);
      #endif
                      orig_resid = 0;
                      if (flags & MSG_PEEK) {
                              if (paddr)
    4                                 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT);
                              m = m->m_next;
                      } else {
   31                         sbfree(&so->so_rcv, m);
                              if (paddr) {
   27                                 *paddr = m;
                                      so->so_rcv.sb_mb = m->m_next;
                                      m->m_next = 0;
                                      m = so->so_rcv.sb_mb;
                              } else {
    4                                 so->so_rcv.sb_mb = m_free(m);
                                      m = so->so_rcv.sb_mb;
                              }
   31                         sbsync(&so->so_rcv, nextrecord);
                      }
              }
   45         while (m && m->m_type == MT_CONTROL && error == 0) {
                      int skip = 0;
                      if (flags & MSG_PEEK) {
    4                         if (mtod(m, struct cmsghdr *)->cmsg_type ==
                                  SCM_RIGHTS) {
                                      /* don't leak internalized SCM_RIGHTS msgs */
                                      skip = 1;
                              } else if (controlp)
                                      *controlp = m_copym(m, 0, m->m_len, M_NOWAIT);
                              m = m->m_next;
                      } else {
   20                         sbfree(&so->so_rcv, m);
                              so->so_rcv.sb_mb = m->m_next;
                              m->m_nextpkt = m->m_next = NULL;
                              cm = m;
                              m = so->so_rcv.sb_mb;
   20                         sbsync(&so->so_rcv, nextrecord);
                              if (controlp) {
                                      if (pr->pr_domain->dom_externalize) {
                                              error =
   16                                             (*pr->pr_domain->dom_externalize)
                                                  (cm, controllen, flags);
                                      }
                                      *controlp = cm;
                              } else {
                                      /*
                                       * Dispose of any SCM_RIGHTS message that went
                                       * through the read path rather than recv.
                                       */
                                      if (pr->pr_domain->dom_dispose)
    4                                         pr->pr_domain->dom_dispose(cm);
                                      m_free(cm);
                              }
                      }
                      if (m != NULL)
   23                         nextrecord = so->so_rcv.sb_mb->m_nextpkt;
                      else
                              nextrecord = so->so_rcv.sb_mb;
    7                 if (controlp && !skip) {
                              orig_resid = 0;
                              controlp = &(*controlp)->m_next;
                      }
              }
      
              /* If m is non-NULL, we have some data to read. */
              if (m) {
                      type = m->m_type;
                      if (type == MT_OOBDATA)
                              flags |= MSG_OOB;
                      if (m->m_flags & M_BCAST)
                              flags |= MSG_BCAST;
                      if (m->m_flags & M_MCAST)
                              flags |= MSG_MCAST;
              }
              SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
              SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
      
              moff = 0;
              offset = 0;
   58         while (m && uio->uio_resid > 0 && error == 0) {
                      if (m->m_type == MT_OOBDATA) {
                              if (type != MT_OOBDATA)
                                      break;
    1                 } else if (type == MT_OOBDATA) {
                              break;
                      } else if (m->m_type == MT_CONTROL) {
                              /*
                               * If there is more than one control message in the
                               * stream, we do a short read.  Next can be received
                               * or disposed by another system call.
                               */
                              break;
      #ifdef DIAGNOSTIC
   47                 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) {
                              panic("receive 3: so %p, so_type %d, m %p, m_type %d",
                                  so, so->so_type, m, m->m_type);
      #endif
                      }
                      so->so_state &= ~SS_RCVATMARK;
                      len = uio->uio_resid;
   47                 if (so->so_oobmark && len > so->so_oobmark - offset)
                              len = so->so_oobmark - offset;
                      if (len > m->m_len - moff)
                              len = m->m_len - moff;
                      /*
                       * If mp is set, just pass back the mbufs.
                       * Otherwise copy them out via the uio, then free.
                       * Sockbuf must be consistent here (points to current mbuf,
                       * it points to next record) when we drop priority;
                       * we must note any additions to the sockbuf when we
                       * block interrupts again.
                       */
    4                 if (mp == NULL && uio_error == 0) {
                              SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
                              SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
                              resid = uio->uio_resid;
                              sounlock(so, s);
                              uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
                              s = solock(so);
   47                         if (uio_error)
                                      uio->uio_resid = resid - len;
                      } else
                              uio->uio_resid -= len;
                      if (len == m->m_len - moff) {
                              if (m->m_flags & M_EOR)
                                      flags |= MSG_EOR;
                              if (flags & MSG_PEEK) {
   11                                 m = m->m_next;
                                      moff = 0;
                              } else {
                                      nextrecord = m->m_nextpkt;
   34                                 sbfree(&so->so_rcv, m);
                                      if (mp) {
                                              *mp = m;
                                              mp = &m->m_next;
                                              so->so_rcv.sb_mb = m = m->m_next;
                                              *mp = NULL;
                                      } else {
   34                                         so->so_rcv.sb_mb = m_free(m);
                                              m = so->so_rcv.sb_mb;
                                      }
                                      /*
                                       * If m != NULL, we also know that
                                       * so->so_rcv.sb_mb != NULL.
                                       */
                                      KASSERT(so->so_rcv.sb_mb == m);
                                      if (m) {
                                              m->m_nextpkt = nextrecord;
    1                                         if (nextrecord == NULL)
    6                                                 so->so_rcv.sb_lastrecord = m;
                                      } else {
                                              so->so_rcv.sb_mb = nextrecord;
   31                                         SB_EMPTY_FIXUP(&so->so_rcv);
                                      }
                                      SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
                                      SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
                              }
                      } else {
                              if (flags & MSG_PEEK)
    3                                 moff += len;
                              else {
    4                                 if (mp)
                                              *mp = m_copym(m, 0, len, M_WAIT);
                                      m->m_data += len;
                                      m->m_len -= len;
                                      so->so_rcv.sb_cc -= len;
                                      so->so_rcv.sb_datacc -= len;
                              }
                      }
   47                 if (so->so_oobmark) {
                              if ((flags & MSG_PEEK) == 0) {
                                      so->so_oobmark -= len;
                                      if (so->so_oobmark == 0) {
                                              so->so_state |= SS_RCVATMARK;
                                              break;
                                      }
                              } else {
                                      offset += len;
                                      if (offset == so->so_oobmark)
                                              break;
                              }
                      }
    6                 if (flags & MSG_EOR)
                              break;
                      /*
                       * If the MSG_WAITALL flag is set (for non-atomic socket),
                       * we must not quit until "uio->uio_resid == 0" or an error
                       * termination.  If a signal/timeout occurs, return
                       * with a short count but without error.
                       * Keep sockbuf locked against other readers.
                       */
   32                 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
    4                     !sosendallatonce(so) && !nextrecord) {
    6                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
                                      break;
                              SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
                              SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
                              error = sbwait(so, &so->so_rcv);
                              if (error) {
                                      sbunlock(so, &so->so_rcv);
                                      sounlock(so, s);
                                      return (0);
                              }
    1                         if ((m = so->so_rcv.sb_mb) != NULL)
    3                                 nextrecord = m->m_nextpkt;
                      }
              }
      
   23         if (m && pr->pr_flags & PR_ATOMIC) {
                      flags |= MSG_TRUNC;
    2                 if ((flags & MSG_PEEK) == 0)
   20                         (void) sbdroprecord(&so->so_rcv);
              }
   13         if ((flags & MSG_PEEK) == 0) {
   34                 if (m == NULL) {
                              /*
                               * First part is an inline SB_EMPTY_FIXUP().  Second
                               * part makes sure sb_lastrecord is up-to-date if
                               * there is still data in the socket buffer.
                               */
                              so->so_rcv.sb_mb = nextrecord;
                              if (so->so_rcv.sb_mb == NULL) {
   21                                 so->so_rcv.sb_mbtail = NULL;
                                      so->so_rcv.sb_lastrecord = NULL;
    4                         } else if (nextrecord->m_nextpkt == NULL)
    6                                 so->so_rcv.sb_lastrecord = nextrecord;
                      }
                      SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
                      SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
   28                 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
   35                         (*pr->pr_usrreq)(so, PRU_RCVD, NULL,
                                  (struct mbuf *)(long)flags, NULL, curproc);
              }
   73         if (orig_resid == uio->uio_resid && orig_resid &&
                  (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
    2                 sbunlock(so, &so->so_rcv);
                      goto restart;
              }
      
              if (uio_error)
                      error = uio_error;
      
              if (flagsp)
   73                 *flagsp |= flags;
      release:
              sbunlock(so, &so->so_rcv);
              sounlock(so, s);
              return (error);
      }
      
      int
      soshutdown(struct socket *so, int how)
   27 {
              const struct protosw *pr = so->so_proto;
              int s, error = 0;
      
              s = solock(so);
    9         switch (how) {
              case SHUT_RD:
    8                 sorflush(so);
                      break;
              case SHUT_RDWR:
   11                 sorflush(so);
                      /* FALLTHROUGH */
              case SHUT_WR:
                      error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL,
                          curproc);
                      break;
              default:
                      error = EINVAL;
                      break;
              }
              sounlock(so, s);
      
              return (error);
      }
      
      void
      sorflush(struct socket *so)
   98 {
              struct sockbuf *sb = &so->so_rcv;
              const struct protosw *pr = so->so_proto;
              struct socket aso;
              int error;
      
              sb->sb_flags |= SB_NOINTR;
              error = sblock(so, sb, M_WAITOK);
              /* with SB_NOINTR and M_WAITOK sblock() must not fail */
              KASSERT(error == 0);
              socantrcvmore(so);
              sbunlock(so, sb);
              aso.so_proto = pr;
              aso.so_rcv = *sb;
              memset(&sb->sb_startzero, 0,
                   (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
   56         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
   42                 (*pr->pr_domain->dom_dispose)(aso.so_rcv.sb_mb);
              sbrelease(&aso, &aso.so_rcv);
      }
      
      #ifdef SOCKET_SPLICE
      
      #define so_splicelen        so_sp->ssp_len
      #define so_splicemax        so_sp->ssp_max
      #define so_idletv        so_sp->ssp_idletv
      #define so_idleto        so_sp->ssp_idleto
      #define so_splicetask        so_sp->ssp_task
      
      int
      sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
   37 {
              struct file        *fp;
              struct socket        *sosp;
   25         struct sosplice        *sp;
              struct taskq        *tq;
              int                 error = 0;
      
              soassertlocked(so);
      
   37         if (sosplice_taskq == NULL) {
                      rw_enter_write(&sosplice_lock);
                      if (sosplice_taskq == NULL) {
                              tq = taskq_create("sosplice", 1, IPL_SOFTNET,
                                  TASKQ_MPSAFE);
                              /* Ensure the taskq is fully visible to other CPUs. */
                              membar_producer();
                              sosplice_taskq = tq;
                      }
                      rw_exit_write(&sosplice_lock);
              }
              if (sosplice_taskq == NULL)
                      return (ENOMEM);
      
    3         if ((so->so_proto->pr_flags & PR_SPLICE) == 0)
                      return (EPROTONOSUPPORT);
    1         if (so->so_options & SO_ACCEPTCONN)
                      return (EOPNOTSUPP);
    1         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
                  (so->so_proto->pr_flags & PR_CONNREQUIRED))
                      return (ENOTCONN);
   11         if (so->so_sp == NULL) {
                      sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
                      if (so->so_sp == NULL)
                              so->so_sp = sp;
                      else
                              pool_put(&sosplice_pool, sp);
              }
      
              /* If no fd is given, unsplice by removing existing link. */
              if (fd < 0) {
                      /* Lock receive buffer. */
                      if ((error = sblock(so, &so->so_rcv, M_WAITOK)) != 0) {
                              return (error);
                      }
    1                 if (so->so_sp->ssp_socket)
    3                         sounsplice(so, so->so_sp->ssp_socket, 0);
                      sbunlock(so, &so->so_rcv);
                      return (0);
              }
      
    2         if (max && max < 0)
                      return (EINVAL);
      
   29         if (tv && (tv->tv_sec < 0 || tv->tv_usec < 0))
                      return (EINVAL);
      
              /* Find sosp, the drain socket where data will be spliced into. */
    3         if ((error = getsock(curproc, fd, &fp)) != 0)
                      return (error);
              sosp = fp->f_data;
   12         if (sosp->so_sp == NULL) {
                      sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
                      if (sosp->so_sp == NULL)
                              sosp->so_sp = sp;
                      else
                              pool_put(&sosplice_pool, sp);
              }
      
              /* Lock both receive and send buffer. */
              if ((error = sblock(so, &so->so_rcv, M_WAITOK)) != 0) {
                      goto frele;
              }
              if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) {
                      sbunlock(so, &so->so_rcv);
                      goto frele;
              }
      
    2         if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
                      error = EBUSY;
                      goto release;
              }
    3         if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) {
                      error = EPROTONOSUPPORT;
                      goto release;
              }
    1         if (sosp->so_options & SO_ACCEPTCONN) {
                      error = EOPNOTSUPP;
                      goto release;
              }
    1         if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) {
                      error = ENOTCONN;
                      goto release;
              }
      
              /* Splice so and sosp together. */
              so->so_sp->ssp_socket = sosp;
              sosp->so_sp->ssp_soback = so;
              so->so_splicelen = 0;
              so->so_splicemax = max;
              if (tv)
                      so->so_idletv = *tv;
              else
    8                 timerclear(&so->so_idletv);
              timeout_set_proc(&so->so_idleto, soidle, so);
              task_set(&so->so_splicetask, sotask, so);
      
              /*
               * To prevent softnet interrupt from calling somove() while
               * we sleep, the socket buffers are not marked as spliced yet.
               */
    8         if (somove(so, M_WAIT)) {
    7                 so->so_rcv.sb_flags |= SB_SPLICE;
                      sosp->so_snd.sb_flags |= SB_SPLICE;
              }
      
       release:
              sbunlock(sosp, &sosp->so_snd);
              sbunlock(so, &so->so_rcv);
       frele:
   22         FRELE(fp, curproc);
              return (error);
      }
      
      void
      sounsplice(struct socket *so, struct socket *sosp, int freeing)
   16 {
              soassertlocked(so);
      
              task_del(sosplice_taskq, &so->so_splicetask);
              timeout_del(&so->so_idleto);
              sosp->so_snd.sb_flags &= ~SB_SPLICE;
              so->so_rcv.sb_flags &= ~SB_SPLICE;
              so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
              /* Do not wakeup a socket that is about to be freed. */
   16         if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
                      sorwakeup(so);
   16         if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp))
   12                 sowwakeup(sosp);
      }
      
      void
      soidle(void *arg)
      {
              struct socket *so = arg;
              int s;
      
              s = solock(so);
              if (so->so_rcv.sb_flags & SB_SPLICE) {
                      so->so_error = ETIMEDOUT;
                      sounsplice(so, so->so_sp->ssp_socket, 0);
              }
              sounlock(so, s);
      }
      
      void
      sotask(void *arg)
      {
              struct socket *so = arg;
              int s;
      
              s = solock(so);
              if (so->so_rcv.sb_flags & SB_SPLICE) {
                      /*
                       * We may not sleep here as sofree() and unsplice() may be
                       * called from softnet interrupt context.  This would remove
                       * the socket during somove().
                       */
                      somove(so, M_DONTWAIT);
              }
              sounlock(so, s);
      
              /* Avoid user land starvation. */
              yield();
      }
      
      /*
       * The socket splicing task or idle timeout may sleep while grabbing the net
       * lock.  As sofree() can be called anytime, sotask() or soidle() could access
       * the socket memory of a freed socket after wakeup.  So delay the pool_put()
       * after all pending socket splicing tasks or timeouts have finished.  Do this
       * by scheduling it on the same threads.
       */
      void
      soreaper(void *arg)
      {
              struct socket *so = arg;
      
              /* Reuse splice task, sounsplice() has been called before. */
              task_set(&so->so_sp->ssp_task, soput, so);
              task_add(sosplice_taskq, &so->so_sp->ssp_task);
      }
      
      void
      soput(void *arg)
      {
              struct socket *so = arg;
      
              pool_put(&sosplice_pool, so->so_sp);
              pool_put(&socket_pool, so);
      }
      
      /*
       * Move data from receive buffer of spliced source socket to send
       * buffer of drain socket.  Try to move as much as possible in one
       * big chunk.  It is a TCP only implementation.
       * Return value 0 means splicing has been finished, 1 continue.
       */
      int
      somove(struct socket *so, int wait)
   16 {
              struct socket        *sosp = so->so_sp->ssp_socket;
              struct mbuf        *m, **mp, *nextrecord;
              u_long                 len, off, oobmark;
              long                 space;
              int                 error = 0, maxreached = 0;
              unsigned int         state;
      
              soassertlocked(so);
      
       nextpkt:
    2         if (so->so_error) {
                      error = so->so_error;
                      goto release;
              }
    3         if (sosp->so_state & SS_CANTSENDMORE) {
                      error = EPIPE;
                      goto release;
              }
   12         if (sosp->so_error && sosp->so_error != ETIMEDOUT &&
                  sosp->so_error != EFBIG && sosp->so_error != ELOOP) {
                      error = sosp->so_error;
                      goto release;
              }
    5         if ((sosp->so_state & SS_ISCONNECTED) == 0)
                      goto release;
      
              /* Calculate how many bytes can be copied now. */
              len = so->so_rcv.sb_datacc;
    3         if (so->so_splicemax) {
                      KASSERT(so->so_splicelen < so->so_splicemax);
    1                 if (so->so_splicemax <= so->so_splicelen + len) {
                              len = so->so_splicemax - so->so_splicelen;
                              maxreached = 1;
                      }
              }
              space = sbspace(sosp, &sosp->so_snd);
    4         if (so->so_oobmark && so->so_oobmark < len &&
                  so->so_oobmark < space + 1024)
                      space += 1024;
              if (space <= 0) {
                      maxreached = 0;
                      goto release;
              }
    4         if (space < len) {
                      maxreached = 0;
                      if (space < sosp->so_snd.sb_lowat)
                              goto release;
                      len = space;
              }
              sosp->so_state |= SS_ISSENDING;
      
              SBLASTRECORDCHK(&so->so_rcv, "somove 1");
              SBLASTMBUFCHK(&so->so_rcv, "somove 1");
              m = so->so_rcv.sb_mb;
    4         if (m == NULL)
                      goto release;
              nextrecord = m->m_nextpkt;
      
              /* Drop address and control information not used with splicing. */
              if (so->so_proto->pr_flags & PR_ADDR) {
      #ifdef DIAGNOSTIC
                      if (m->m_type != MT_SONAME)
                              panic("somove soname: so %p, so_type %d, m %p, "
                                  "m_type %d", so, so->so_type, m, m->m_type);
      #endif
                      m = m->m_next;
              }
              while (m && m->m_type == MT_CONTROL)
                      m = m->m_next;
              if (m == NULL) {
                      sbdroprecord(&so->so_rcv);
                      if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb)
                              (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL,
                                  NULL, NULL, NULL);
                      goto nextpkt;
              }
      
              /*
               * By splicing sockets connected to localhost, userland might create a
               * loop.  Dissolve splicing with error if loop is detected by counter.
               */
              if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) {
                      error = ELOOP;
                      goto release;
              }
      
              if (so->so_proto->pr_flags & PR_ATOMIC) {
                      if ((m->m_flags & M_PKTHDR) == 0)
                              panic("somove !PKTHDR: so %p, so_type %d, m %p, "
                                  "m_type %d", so, so->so_type, m, m->m_type);
                      if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) {
                              error = EMSGSIZE;
                              goto release;
                      }
                      if (len < m->m_pkthdr.len)
                              goto release;
                      if (m->m_pkthdr.len < len) {
                              maxreached = 0;
                              len = m->m_pkthdr.len;
                      }
                      /*
                       * Throw away the name mbuf after it has been assured
                       * that the whole first record can be processed.
                       */
                      m = so->so_rcv.sb_mb;
                      sbfree(&so->so_rcv, m);
                      so->so_rcv.sb_mb = m_free(m);
                      sbsync(&so->so_rcv, nextrecord);
              }
              /*
               * Throw away the control mbufs after it has been assured
               * that the whole first record can be processed.
               */
              m = so->so_rcv.sb_mb;
              while (m && m->m_type == MT_CONTROL) {
                      sbfree(&so->so_rcv, m);
                      so->so_rcv.sb_mb = m_free(m);
                      m = so->so_rcv.sb_mb;
                      sbsync(&so->so_rcv, nextrecord);
              }
      
              SBLASTRECORDCHK(&so->so_rcv, "somove 2");
              SBLASTMBUFCHK(&so->so_rcv, "somove 2");
      
              /* Take at most len mbufs out of receive buffer. */
              for (off = 0, mp = &m; off <= len && *mp;
                  off += (*mp)->m_len, mp = &(*mp)->m_next) {
                      u_long size = len - off;
      
      #ifdef DIAGNOSTIC
                      if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER)
                              panic("somove type: so %p, so_type %d, m %p, "
                                  "m_type %d", so, so->so_type, *mp, (*mp)->m_type);
      #endif
                      if ((*mp)->m_len > size) {
                              /*
                               * Move only a partial mbuf at maximum splice length or
                               * if the drain buffer is too small for this large mbuf.
                               */
                              if (!maxreached && so->so_snd.sb_datacc > 0) {
                                      len -= size;
                                      break;
                              }
                              *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait);
                              if (*mp == NULL) {
                                      len -= size;
                                      break;
                              }
                              so->so_rcv.sb_mb->m_data += size;
                              so->so_rcv.sb_mb->m_len -= size;
                              so->so_rcv.sb_cc -= size;
                              so->so_rcv.sb_datacc -= size;
                      } else {
                              *mp = so->so_rcv.sb_mb;
                              sbfree(&so->so_rcv, *mp);
                              so->so_rcv.sb_mb = (*mp)->m_next;
                              sbsync(&so->so_rcv, nextrecord);
                      }
              }
              *mp = NULL;
      
              SBLASTRECORDCHK(&so->so_rcv, "somove 3");
              SBLASTMBUFCHK(&so->so_rcv, "somove 3");
              SBCHECK(&so->so_rcv);
              if (m == NULL)
                      goto release;
              m->m_nextpkt = NULL;
              if (m->m_flags & M_PKTHDR) {
                      m_resethdr(m);
                      m->m_pkthdr.len = len;
              }
      
              /* Send window update to source peer as receive buffer has changed. */
              if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb)
                      (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL,
                          NULL, NULL, NULL);
      
              /* Receive buffer did shrink by len bytes, adjust oob. */
              state = so->so_state;
              so->so_state &= ~SS_RCVATMARK;
              oobmark = so->so_oobmark;
              so->so_oobmark = oobmark > len ? oobmark - len : 0;
              if (oobmark) {
                      if (oobmark == len)
                              so->so_state |= SS_RCVATMARK;
                      if (oobmark >= len)
                              oobmark = 0;
              }
      
              /*
               * Handle oob data.  If any malloc fails, ignore error.
               * TCP urgent data is not very reliable anyway.
               */
              while (((state & SS_RCVATMARK) || oobmark) &&
                  (so->so_options & SO_OOBINLINE)) {
                      struct mbuf *o = NULL;
      
                      if (state & SS_RCVATMARK) {
                              o = m_get(wait, MT_DATA);
                              state &= ~SS_RCVATMARK;
                      } else if (oobmark) {
                              o = m_split(m, oobmark, wait);
                              if (o) {
                                      error = (*sosp->so_proto->pr_usrreq)(sosp,
                                          PRU_SEND, m, NULL, NULL, NULL);
                                      if (error) {
                                              if (sosp->so_state & SS_CANTSENDMORE)
                                                      error = EPIPE;
                                              m_freem(o);
                                              goto release;
                                      }
                                      len -= oobmark;
                                      so->so_splicelen += oobmark;
                                      m = o;
                                      o = m_get(wait, MT_DATA);
                              }
                              oobmark = 0;
                      }
                      if (o) {
                              o->m_len = 1;
                              *mtod(o, caddr_t) = *mtod(m, caddr_t);
                              error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB,
                                  o, NULL, NULL, NULL);
                              if (error) {
                                      if (sosp->so_state & SS_CANTSENDMORE)
                                              error = EPIPE;
                                      m_freem(m);
                                      goto release;
                              }
                              len -= 1;
                              so->so_splicelen += 1;
                              if (oobmark) {
                                      oobmark -= 1;
                                      if (oobmark == 0)
                                              state |= SS_RCVATMARK;
                              }
                              m_adj(m, 1);
                      }
              }
      
              /* Append all remaining data to drain socket. */
              if (so->so_rcv.sb_cc == 0 || maxreached)
                      sosp->so_state &= ~SS_ISSENDING;
              error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL,
                  NULL);
              if (error) {
                      if (sosp->so_state & SS_CANTSENDMORE)
                              error = EPIPE;
                      goto release;
              }
              so->so_splicelen += len;
      
              /* Move several packets if possible. */
              if (!maxreached && nextrecord)
                      goto nextpkt;
      
       release:
              sosp->so_state &= ~SS_ISSENDING;
              if (!error && maxreached && so->so_splicemax == so->so_splicelen)
                      error = EFBIG;
   12         if (error)
                      so->so_error = error;
   16         if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
    5             (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
                      sounsplice(so, sosp, 0);
                      return (0);
              }
    7         if (timerisset(&so->so_idletv))
                      timeout_add_tv(&so->so_idleto, &so->so_idletv);
              return (1);
      }
      
      #endif /* SOCKET_SPLICE */
      
      void
      sorwakeup(struct socket *so)
  308 {
              soassertlocked(so);
      
      #ifdef SOCKET_SPLICE
  306         if (so->so_rcv.sb_flags & SB_SPLICE) {
                      /*
                       * TCP has a sendbuffer that can handle multiple packets
                       * at once.  So queue the stream a bit to accumulate data.
                       * The sosplice thread will call somove() later and send
                       * the packets calling tcp_output() only once.
                       * In the UDP case, send out the packets immediately.
                       * Using a thread would make things slower.
                       */
                      if (so->so_proto->pr_flags & PR_WANTRCVD)
    3                         task_add(sosplice_taskq, &so->so_splicetask);
                      else
    1                         somove(so, M_DONTWAIT);
              }
  308         if (isspliced(so))
                      return;
      #endif
              sowakeup(so, &so->so_rcv);
  306         if (so->so_upcall)
                      (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
      }
      
      void
      sowwakeup(struct socket *so)
  212 {
              soassertlocked(so);
      
      #ifdef SOCKET_SPLICE
   12         if (so->so_snd.sb_flags & SB_SPLICE)
    3                 task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
  223         if (issplicedback(so))
                      return;
      #endif
              sowakeup(so, &so->so_snd);
      }
      
      int
      sosetopt(struct socket *so, int level, int optname, struct mbuf *m)
  658 {
              int error = 0;
      
              soassertlocked(so);
      
              if (level != SOL_SOCKET) {
    1                 if (so->so_proto->pr_ctloutput) {
  527                         error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so,
                                  level, optname, m);
                              return (error);
                      }
                      error = ENOPROTOOPT;
              } else {
   66                 switch (optname) {
                      case SO_BINDANY:
    2                         if ((error = suser(curproc)) != 0)        /* XXX */
                                      return (error);
                              break;
                      }
      
                      switch (optname) {
      
                      case SO_LINGER:
    4                         if (m == NULL || m->m_len != sizeof (struct linger) ||
    2                             mtod(m, struct linger *)->l_linger < 0 ||
                                  mtod(m, struct linger *)->l_linger > SHRT_MAX)
                                      return (EINVAL);
    3                         so->so_linger = mtod(m, struct linger *)->l_linger;
                              /* FALLTHROUGH */
      
                      case SO_BINDANY:
                      case SO_DEBUG:
                      case SO_KEEPALIVE:
                      case SO_USELOOPBACK:
                      case SO_BROADCAST:
                      case SO_REUSEADDR:
                      case SO_REUSEPORT:
                      case SO_OOBINLINE:
                      case SO_TIMESTAMP:
                      case SO_ZEROIZE:
   28                         if (m == NULL || m->m_len < sizeof (int))
                                      return (EINVAL);
                              if (*mtod(m, int *))
   12                                 so->so_options |= optname;
                              else
    9                                 so->so_options &= ~optname;
                              break;
      
                      case SO_DONTROUTE:
    4                         if (m == NULL || m->m_len < sizeof (int))
                                      return (EINVAL);
                              if (*mtod(m, int *))
                                      error = EOPNOTSUPP;
                              break;
      
                      case SO_SNDBUF:
                      case SO_RCVBUF:
                      case SO_SNDLOWAT:
                      case SO_RCVLOWAT:
                          {
                              u_long cnt;
      
    8                         if (m == NULL || m->m_len < sizeof (int))
                                      return (EINVAL);
                              cnt = *mtod(m, int *);
                              if ((long)cnt <= 0)
                                      cnt = 1;
                              switch (optname) {
      
                              case SO_SNDBUF:
    1                                 if (so->so_state & SS_CANTSENDMORE)
                                              return (EINVAL);
    1                                 if (sbcheckreserve(cnt, so->so_snd.sb_wat) ||
                                          sbreserve(so, &so->so_snd, cnt))
                                              return (ENOBUFS);
    4                                 so->so_snd.sb_wat = cnt;
                                      break;
      
                              case SO_RCVBUF:
    1                                 if (so->so_state & SS_CANTRCVMORE)
                                              return (EINVAL);
    1                                 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) ||
                                          sbreserve(so, &so->so_rcv, cnt))
                                              return (ENOBUFS);
    3                                 so->so_rcv.sb_wat = cnt;
                                      break;
      
                              case SO_SNDLOWAT:
                                      so->so_snd.sb_lowat =
    2                                     (cnt > so->so_snd.sb_hiwat) ?
                                          so->so_snd.sb_hiwat : cnt;
                                      break;
                              case SO_RCVLOWAT:
                                      so->so_rcv.sb_lowat =
    2                                     (cnt > so->so_rcv.sb_hiwat) ?
                                          so->so_rcv.sb_hiwat : cnt;
                                      break;
                              }
                              break;
                          }
      
                      case SO_SNDTIMEO:
                      case SO_RCVTIMEO:
                          {
                              struct timeval tv;
                              int val;
      
    4                         if (m == NULL || m->m_len < sizeof (tv))
                                      return (EINVAL);
                              memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
                              val = tvtohz(&tv);
    3                         if (val > USHRT_MAX)
                                      return (EDOM);
      
                              switch (optname) {
      
                              case SO_SNDTIMEO:
    3                                 so->so_snd.sb_timeo = val;
                                      break;
                              case SO_RCVTIMEO:
    4                                 so->so_rcv.sb_timeo = val;
                                      break;
                              }
                              break;
                          }
      
                      case SO_RTABLE:
    1                         if (so->so_proto->pr_domain &&
                                  so->so_proto->pr_domain->dom_protosw &&
                                  so->so_proto->pr_ctloutput) {
    8                                 struct domain *dom = so->so_proto->pr_domain;
      
                                      level = dom->dom_protosw->pr_protocol;
                                      error = (*so->so_proto->pr_ctloutput)
                                          (PRCO_SETOPT, so, level, optname, m);
                                      return (error);
                              }
                              error = ENOPROTOOPT;
                              break;
      
      #ifdef SOCKET_SPLICE
                      case SO_SPLICE:
                              if (m == NULL) {
    6                                 error = sosplice(so, -1, 0, NULL);
    2                         } else if (m->m_len < sizeof(int)) {
                                      return (EINVAL);
                              } else if (m->m_len < sizeof(struct splice)) {
   17                                 error = sosplice(so, *mtod(m, int *), 0, NULL);
                              } else {
                                      error = sosplice(so,
   17                                     mtod(m, struct splice *)->sp_fd,
                                          mtod(m, struct splice *)->sp_max,
                                         &mtod(m, struct splice *)->sp_idle);
                              }
                              break;
      #endif /* SOCKET_SPLICE */
      
                      default:
                              error = ENOPROTOOPT;
                              break;
                      }
   56                 if (error == 0 && so->so_proto->pr_ctloutput) {
   37                         (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so,
                                  level, optname, m);
                      }
              }
      
              return (error);
      }
      
      int
      sogetopt(struct socket *so, int level, int optname, struct mbuf *m)
  161 {
              int error = 0;
      
              soassertlocked(so);
      
              if (level != SOL_SOCKET) {
    1                 if (so->so_proto->pr_ctloutput) {
   96                         m->m_len = 0;
      
                              error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so,
                                  level, optname, m);
                              if (error)
                                      return (error);
                              return (0);
                      } else
                              return (ENOPROTOOPT);
              } else {
                      m->m_len = sizeof (int);
      
   22                 switch (optname) {
      
                      case SO_LINGER:
    3                         m->m_len = sizeof (struct linger);
                              mtod(m, struct linger *)->l_onoff =
                                      so->so_options & SO_LINGER;
                              mtod(m, struct linger *)->l_linger = so->so_linger;
                              break;
      
                      case SO_BINDANY:
                      case SO_USELOOPBACK:
                      case SO_DEBUG:
                      case SO_KEEPALIVE:
                      case SO_REUSEADDR:
                      case SO_REUSEPORT:
                      case SO_BROADCAST:
                      case SO_OOBINLINE:
                      case SO_TIMESTAMP:
                      case SO_ZEROIZE:
                              *mtod(m, int *) = so->so_options & optname;
                              break;
      
                      case SO_DONTROUTE:
    2                         *mtod(m, int *) = 0;
                              break;
      
                      case SO_TYPE:
    2                         *mtod(m, int *) = so->so_type;
                              break;
      
                      case SO_ERROR:
    2                         *mtod(m, int *) = so->so_error;
                              so->so_error = 0;
                              break;
      
                      case SO_DOMAIN:
    2                         *mtod(m, int *) = so->so_proto->pr_domain->dom_family;
                              break;
      
                      case SO_PROTOCOL:
    2                         *mtod(m, int *) = so->so_proto->pr_protocol;
                              break;
      
                      case SO_SNDBUF:
    2                         *mtod(m, int *) = so->so_snd.sb_hiwat;
                              break;
      
                      case SO_RCVBUF:
    2                         *mtod(m, int *) = so->so_rcv.sb_hiwat;
                              break;
      
                      case SO_SNDLOWAT:
    2                         *mtod(m, int *) = so->so_snd.sb_lowat;
                              break;
      
                      case SO_RCVLOWAT:
    2                         *mtod(m, int *) = so->so_rcv.sb_lowat;
                              break;
      
                      case SO_SNDTIMEO:
                      case SO_RCVTIMEO:
                          {
                              struct timeval tv;
                              int val = (optname == SO_SNDTIMEO ?
    4                             so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
      
                              m->m_len = sizeof(struct timeval);
                              memset(&tv, 0, sizeof(tv));
                              tv.tv_sec = val / hz;
                              tv.tv_usec = (val % hz) * tick;
                              memcpy(mtod(m, struct timeval *), &tv, sizeof tv);
                              break;
                          }
      
                      case SO_RTABLE:
    1                         if (so->so_proto->pr_domain &&
                                  so->so_proto->pr_domain->dom_protosw &&
                                  so->so_proto->pr_ctloutput) {
                                      struct domain *dom = so->so_proto->pr_domain;
      
                                      level = dom->dom_protosw->pr_protocol;
                                      error = (*so->so_proto->pr_ctloutput)
                                          (PRCO_GETOPT, so, level, optname, m);
                                      if (error)
                                              return (error);
                                      break;
                              }
                              return (ENOPROTOOPT);
      
      #ifdef SOCKET_SPLICE
                      case SO_SPLICE:
                          {
                              off_t len;
      
                              m->m_len = sizeof(off_t);
    4                         len = so->so_sp ? so->so_sp->ssp_len : 0;
                              memcpy(mtod(m, off_t *), &len, sizeof(off_t));
                              break;
                          }
      #endif /* SOCKET_SPLICE */
      
                      case SO_PEERCRED:
    3                         if (so->so_proto->pr_protocol == AF_UNIX) {
                                      struct unpcb *unp = sotounpcb(so);
      
    1                                 if (unp->unp_flags & UNP_FEIDS) {
    5                                         m->m_len = sizeof(unp->unp_connid);
                                              memcpy(mtod(m, caddr_t),
                                                  &(unp->unp_connid), m->m_len);
                                              break;
                                      }
                                      return (ENOTCONN);
                              }
                              return (EOPNOTSUPP);
      
                      default:
                              return (ENOPROTOOPT);
                      }
                      return (0);
              }
      }
      
      void
      sohasoutofband(struct socket *so)
      {
              KERNEL_LOCK();
              pgsigio(&so->so_sigio, SIGURG, 0);
              selwakeup(&so->so_rcv.sb_sel);
              KERNEL_UNLOCK();
      }
      
      int
      soo_kqfilter(struct file *fp, struct knote *kn)
   30 {
              struct socket *so = kn->kn_fp->f_data;
              struct sockbuf *sb;
      
              KERNEL_ASSERT_LOCKED();
      
    1         switch (kn->kn_filter) {
              case EVFILT_READ:
   18                 if (so->so_options & SO_ACCEPTCONN)
                              kn->kn_fop = &solisten_filtops;
                      else
                              kn->kn_fop = &soread_filtops;
                      sb = &so->so_rcv;
                      break;
              case EVFILT_WRITE:
   15                 kn->kn_fop = &sowrite_filtops;
                      sb = &so->so_snd;
                      break;
              default:
                      return (EINVAL);
              }
      
              SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
              sb->sb_flagsintr |= SB_KNOTE;
      
              return (0);
      }
      
      void
      filt_sordetach(struct knote *kn)
   10 {
              struct socket *so = kn->kn_fp->f_data;
      
              KERNEL_ASSERT_LOCKED();
      
   10         SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
    3         if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
    7                 so->so_rcv.sb_flagsintr &= ~SB_KNOTE;
      }
      
      int
      filt_soread(struct knote *kn, long hint)
   33 {
              struct socket *so = kn->kn_fp->f_data;
              int rv;
      
              kn->kn_data = so->so_rcv.sb_cc;
      #ifdef SOCKET_SPLICE
   33         if (isspliced(so)) {
                      rv = 0;
              } else
      #endif /* SOCKET_SPLICE */
              if (so->so_state & SS_CANTRCVMORE) {
    5                 kn->kn_flags |= EV_EOF;
                      kn->kn_fflags = so->so_error;
                      rv = 1;
    5         } else if (so->so_error) {        /* temporary udp error */
                      rv = 1;
              } else if (kn->kn_sfflags & NOTE_LOWAT) {
   10                 rv = (kn->kn_data >= kn->kn_sdata);
              } else {
   14                 rv = (kn->kn_data >= so->so_rcv.sb_lowat);
              }
      
              return rv;
      }
      
      void
      filt_sowdetach(struct knote *kn)
    9 {
              struct socket *so = kn->kn_fp->f_data;
      
              KERNEL_ASSERT_LOCKED();
      
    9         SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
    3         if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
    7                 so->so_snd.sb_flagsintr &= ~SB_KNOTE;
      }
      
      int
      filt_sowrite(struct knote *kn, long hint)
   29 {
              struct socket *so = kn->kn_fp->f_data;
              int rv;
      
              kn->kn_data = sbspace(so, &so->so_snd);
              if (so->so_state & SS_CANTSENDMORE) {
    5                 kn->kn_flags |= EV_EOF;
                      kn->kn_fflags = so->so_error;
                      rv = 1;
    4         } else if (so->so_error) {        /* temporary udp error */
                      rv = 1;
   20         } else if (((so->so_state & SS_ISCONNECTED) == 0) &&
                  (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
                      rv = 0;
              } else if (kn->kn_sfflags & NOTE_LOWAT) {
    7                 rv = (kn->kn_data >= kn->kn_sdata);
              } else {
   13                 rv = (kn->kn_data >= so->so_snd.sb_lowat);
              }
      
              return (rv);
      }
      
      int
      filt_solisten(struct knote *kn, long hint)
    2 {
              struct socket *so = kn->kn_fp->f_data;
      
              kn->kn_data = so->so_qlen;
      
              return (kn->kn_data != 0);
      }
      
      #ifdef DDB
      void
      sobuf_print(struct sockbuf *,
          int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))));
      
      void
      sobuf_print(struct sockbuf *sb,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
      {
              (*pr)("\tsb_cc: %lu\n", sb->sb_cc);
              (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc);
              (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat);
              (*pr)("\tsb_wat: %lu\n", sb->sb_wat);
              (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt);
              (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax);
              (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat);
              (*pr)("\tsb_mb: %p\n", sb->sb_mb);
              (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
              (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
              (*pr)("\tsb_sel: ...\n");
              (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
              (*pr)("\tsb_flags: %i\n", sb->sb_flags);
              (*pr)("\tsb_timeo: %i\n", sb->sb_timeo);
      }
      
      void
      so_print(void *v,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
      {
              struct socket *so = v;
      
              (*pr)("socket %p\n", so);
              (*pr)("so_type: %i\n", so->so_type);
              (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */
              (*pr)("so_linger: %i\n", so->so_linger);
              (*pr)("so_state: 0x%04x\n", so->so_state);
              (*pr)("so_pcb: %p\n", so->so_pcb);
              (*pr)("so_proto: %p\n", so->so_proto);
              (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio);
      
              (*pr)("so_head: %p\n", so->so_head);
              (*pr)("so_onq: %p\n", so->so_onq);
              (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0));
              (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q));
              (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe));
              (*pr)("so_q0len: %i\n", so->so_q0len);
              (*pr)("so_qlen: %i\n", so->so_qlen);
              (*pr)("so_qlimit: %i\n", so->so_qlimit);
              (*pr)("so_timeo: %i\n", so->so_timeo);
              (*pr)("so_obmark: %lu\n", so->so_oobmark);
      
              (*pr)("so_sp: %p\n", so->so_sp);
              if (so->so_sp != NULL) {
                      (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket);
                      (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback);
                      (*pr)("\tssp_len: %lld\n",
                          (unsigned long long)so->so_sp->ssp_len);
                      (*pr)("\tssp_max: %lld\n",
                          (unsigned long long)so->so_sp->ssp_max);
                      (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec,
                          so->so_sp->ssp_idletv.tv_usec);
                      (*pr)("\tssp_idleto: %spending (@%i)\n",
                          timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ",
                          so->so_sp->ssp_idleto.to_time);
              }
      
              (*pr)("so_rcv:\n");
              sobuf_print(&so->so_rcv, pr);
              (*pr)("so_snd:\n");
              sobuf_print(&so->so_snd, pr);
      
              (*pr)("so_upcall: %p so_upcallarg: %p\n",
                  so->so_upcall, so->so_upcallarg);
      
              (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid);
              (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid);
              (*pr)("so_cpid: %d\n", so->so_cpid);
      }
      #endif
      /*        $OpenBSD: endian.h,v 1.7 2018/10/02 21:30:44 naddy Exp $        */
      
      /*-
       * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      #ifndef _MACHINE_ENDIAN_H_
      #define _MACHINE_ENDIAN_H_
      
      #ifndef __FROM_SYS__ENDIAN
      #include <sys/_types.h>
      #endif
      
      static __inline __uint16_t
   20 __swap16md(__uint16_t _x)
      {
              __asm ("rorw $8, %w0" : "+r" (_x));
              return (_x);
      }
      
      static __inline __uint32_t
      __swap32md(__uint32_t _x)
      {
              __asm ("bswap %0" : "+r" (_x));
              return (_x);
      }
      
      static __inline __uint64_t
      __swap64md(__uint64_t _x)
      {
              __asm ("bswapq %0" : "+r" (_x));
              return (_x);
      }
      
      /* Tell sys/endian.h we have MD variants of the swap macros.  */
      #define __HAVE_MD_SWAP
      
      #define _BYTE_ORDER _LITTLE_ENDIAN
      
      #ifndef __FROM_SYS__ENDIAN
      #include <sys/endian.h>
      #endif
      
      #endif /* _MACHINE_ENDIAN_H_ */
      /*        $OpenBSD: ppp_tty.c,v 1.52 2019/07/19 00:17:16 cheloha Exp $        */
      /*        $NetBSD: ppp_tty.c,v 1.12 1997/03/24 21:23:10 christos Exp $        */
      
      /*
       * ppp_tty.c - Point-to-Point Protocol (PPP) driver for asynchronous
       *               tty devices.
       *
       * Copyright (c) 1984-2000 Carnegie Mellon University. All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       *
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       *
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in
       *    the documentation and/or other materials provided with the
       *    distribution.
       *
       * 3. The name "Carnegie Mellon University" must not be used to
       *    endorse or promote products derived from this software without
       *    prior written permission. For permission or any legal
       *    details, please contact
       *      Office of Technology Transfer
       *      Carnegie Mellon University
       *      5000 Forbes Avenue
       *      Pittsburgh, PA  15213-3890
       *      (412) 268-4387, fax: (412) 268-7395
       *      tech-transfer@andrew.cmu.edu
       *
       * 4. Redistributions of any form whatsoever must retain the following
       *    acknowledgment:
       *    "This product includes software developed by Computing Services
       *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
       *
       * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
       * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
       * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
       * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
       * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
       * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       *
       * Based on:
       *        @(#)if_sl.c        7.6.1.2 (Berkeley) 2/15/89
       *
       * Copyright (c) 1987 Regents of the University of California.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms are permitted
       * provided that the above copyright notice and this paragraph are
       * duplicated in all such forms and that any documentation,
       * advertising materials, and other materials related to such
       * distribution and use acknowledge that the software was developed
       * by the University of California, Berkeley.  The name of the
       * University may not be used to endorse or promote products derived
       * from this software without specific prior written permission.
       * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
       * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
       *
       * Serial Line interface
       *
       * Rick Adams
       * Center for Seismic Studies
       * 1300 N 17th Street, Suite 1450
       * Arlington, Virginia 22209
       * (703)276-7900
       * rick@seismo.ARPA
       * seismo!rick
       *
       * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
       * Converted to 4.3BSD Beta by Chris Torek.
       * Other changes made at Berkeley, based in part on code by Kirk Smith.
       *
       * Converted to 4.3BSD+ 386BSD by Brad Parker (brad@cayman.com)
       * Added VJ tcp header compression; more unified ioctls
       *
       * Extensively modified by Paul Mackerras (paulus@cs.anu.edu.au).
       * Cleaned up a lot of the mbuf-related code to fix bugs that
       * caused system crashes and packet corruption.  Changed pppstart
       * so that it doesn't just give up with a collision if the whole
       * packet doesn't fit in the output ring buffer.
       *
       * Added priority queueing for interactive IP packets, following
       * the model of if_sl.c, plus hooks for bpf.
       * Paul Mackerras (paulus@cs.anu.edu.au).
       */
      
      /* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */
      /* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */
      
      #include "ppp.h"
      #if NPPP > 0
      
      #define VJC
      #define PPP_COMPRESS
      
      #include <sys/param.h>
      #include <sys/proc.h>
      #include <sys/mbuf.h>
      #include <sys/socket.h>
      #include <sys/timeout.h>
      #include <sys/ioctl.h>
      #include <sys/fcntl.h>
      #include <sys/tty.h>
      #include <sys/kernel.h>
      #include <sys/conf.h>
      #include <sys/vnode.h>
      #include <sys/systm.h>
      #include <sys/rwlock.h>
      #include <sys/pool.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      
      #ifdef VJC
      #include <netinet/in.h>
      #include <netinet/ip.h>
      #include <net/slcompress.h>
      #endif
      
      #include <net/bpf.h>
      #include <net/ppp_defs.h>
      #include <net/if_ppp.h>
      #include <net/if_pppvar.h>
      
      int        pppstart_internal(struct tty *tp, int);
      
      u_int16_t pppfcs(u_int16_t fcs, u_char *cp, int len);
      void        pppasyncstart(struct ppp_softc *);
      void        pppasyncctlp(struct ppp_softc *);
      void        pppasyncrelinq(struct ppp_softc *);
      void        ppp_timeout(void *);
      void        ppppkt(struct ppp_softc *sc);
      void        pppdumpb(u_char *b, int l);
      void        ppplogchar(struct ppp_softc *, int);
      
      struct rwlock ppp_pkt_init = RWLOCK_INITIALIZER("ppppktini");
      struct pool ppp_pkts;
      
      #define PKT_MAXLEN(_sc) ((_sc)->sc_mru + PPP_HDRLEN + PPP_FCSLEN)
      
      /*
       * Does c need to be escaped?
       */
      #define ESCAPE_P(c)        (sc->sc_asyncmap[(c) >> 5] & (1 << ((c) & 0x1F)))
      
      /*
       * Procedures for using an async tty interface for PPP.
       */
      
      /* This is a NetBSD-1.0 or later kernel. */
      #define CCOUNT(q)        ((q)->c_cc)
      
      /*
       * Line specific open routine for async tty devices.
       * Attach the given tty to the first available ppp unit.
       * Called from device open routine or ttioctl.
       */
      int
      pppopen(dev_t dev, struct tty *tp, struct proc *p)
    2 {
          struct ppp_softc *sc;
          int error, s;
      
    2     if ((error = suser(p)) != 0)
              return (error);
      
          rw_enter_write(&ppp_pkt_init);
          if (ppp_pkts.pr_size == 0) {
              extern struct kmem_pa_mode kp_dma_contig;
      
              pool_init(&ppp_pkts, sizeof(struct ppp_pkt), 0,
                IPL_TTY, 0, "ppppkts", NULL); /* IPL_SOFTTTY */
              pool_set_constraints(&ppp_pkts, &kp_dma_contig);
          }
          rw_exit_write(&ppp_pkt_init);
      
          s = spltty();
      
          if (tp->t_line == PPPDISC) {
              sc = (struct ppp_softc *) tp->t_sc;
              if (sc != NULL && sc->sc_devp == (void *) tp) {
                  splx(s);
                  return (0);
              }
          }
      
          if ((sc = pppalloc(p->p_p->ps_pid)) == NULL) {
              splx(s);
              return ENXIO;
          }
      
          if (sc->sc_relinq)
              (*sc->sc_relinq)(sc);        /* get previous owner to relinquish the unit */
      
          timeout_set(&sc->sc_timo, ppp_timeout, sc);
          sc->sc_ilen = 0;
          sc->sc_pkt = NULL;
          bzero(sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
          sc->sc_asyncmap[0] = 0xffffffff;
          sc->sc_asyncmap[3] = 0x60000000;
          sc->sc_rasyncmap = 0;
          sc->sc_devp = (void *) tp;
          sc->sc_start = pppasyncstart;
          sc->sc_ctlp = pppasyncctlp;
          sc->sc_relinq = pppasyncrelinq;
          sc->sc_outm = NULL;
          ppppkt(sc);
          sc->sc_if.if_flags |= IFF_RUNNING;
          sc->sc_if.if_baudrate = tp->t_ospeed;
      
          tp->t_sc = (caddr_t) sc;
          ttyflush(tp, FREAD | FWRITE);
      
          splx(s);
          return (0);
      }
      
      /*
       * Line specific close routine, called from device close routine
       * and from ttioctl.
       * Detach the tty from the ppp unit.
       * Mimics part of ttyclose().
       */
      int
      pppclose(struct tty *tp, int flag, struct proc *p)
      {
          struct ppp_softc *sc;
          int s;
      
          s = spltty();
          ttyflush(tp, FREAD|FWRITE);
          tp->t_line = 0;
          sc = (struct ppp_softc *) tp->t_sc;
          if (sc != NULL) {
              tp->t_sc = NULL;
              if (tp == (struct tty *) sc->sc_devp) {
                  pppasyncrelinq(sc);
                  pppdealloc(sc);
              }
          }
          splx(s);
          return 0;
      }
      
      /*
       * Relinquish the interface unit to another device.
       */
      void
      pppasyncrelinq(struct ppp_softc *sc)
      {
          int s;
      
          KERNEL_LOCK();
          s = spltty();
          m_freem(sc->sc_outm);
          sc->sc_outm = NULL;
      
          if (sc->sc_pkt != NULL) {
              ppp_pkt_free(sc->sc_pkt);
              sc->sc_pkt = sc->sc_pktc = NULL;
          }
          if (sc->sc_flags & SC_TIMEOUT) {
              timeout_del(&sc->sc_timo);
              sc->sc_flags &= ~SC_TIMEOUT;
          }
          splx(s);
          KERNEL_UNLOCK();
      }
      
      /*
       * Line specific (tty) read routine.
       */
      int
      pppread(struct tty *tp, struct uio *uio, int flag)
      {
          struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
          struct mbuf *m, *m0;
          int s;
          int error = 0;
      
          if (sc == NULL)
              return 0;
          /*
           * Loop waiting for input, checking that nothing disasterous
           * happens in the meantime.
           */
          s = spltty();
          for (;;) {
              if (tp != (struct tty *) sc->sc_devp || tp->t_line != PPPDISC) {
                  splx(s);
                  return 0;
              }
              /* Get the packet from the input queue */
              m0 = mq_dequeue(&sc->sc_inq);
              if (m0 != NULL)
                  break;
              if ((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0
                  && (tp->t_state & TS_ISOPEN)) {
                  splx(s);
                  return 0;                /* end of file */
              }
              if (tp->t_state & TS_ASYNC || flag & IO_NDELAY) {
                  splx(s);
                  return (EWOULDBLOCK);
              }
              error = ttysleep(tp, (caddr_t)&tp->t_rawq, TTIPRI|PCATCH, ttyin);
              if (error) {
                  splx(s);
                  return error;
              }
          }
      
          /* Pull place-holder byte out of canonical queue */
          getc(&tp->t_canq);
          splx(s);
      
          for (m = m0; m && uio->uio_resid; m = m->m_next)
              if ((error = uiomove(mtod(m, u_char *), m->m_len, uio)) != 0)
                  break;
          m_freem(m0);
          return (error);
      }
      
      /*
       * Line specific (tty) write routine.
       */
      int
      pppwrite(struct tty *tp, struct uio *uio, int flag)
      {
          struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
          struct mbuf *m, *m0, **mp;
          struct sockaddr dst;
          u_int len;
          int error;
      
          if ((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0)
              return 0;                /* wrote 0 bytes */
          if (tp->t_line != PPPDISC)
              return (EINVAL);
          if (sc == NULL || tp != (struct tty *) sc->sc_devp)
              return EIO;
          if (uio->uio_resid > sc->sc_if.if_mtu + PPP_HDRLEN ||
              uio->uio_resid < PPP_HDRLEN)
              return (EMSGSIZE);
          for (mp = &m0; uio->uio_resid; mp = &m->m_next) {
              if (mp == &m0) {
                  MGETHDR(m, M_WAIT, MT_DATA);
                  m->m_pkthdr.len = uio->uio_resid - PPP_HDRLEN;
                  m->m_pkthdr.ph_ifidx = 0;
              } else
                  MGET(m, M_WAIT, MT_DATA);
              *mp = m;
              m->m_len = 0;
              if (uio->uio_resid >= MCLBYTES / 2)
                  MCLGET(m, M_DONTWAIT);
              len = m_trailingspace(m);
              if (len > uio->uio_resid)
                  len = uio->uio_resid;
              if ((error = uiomove(mtod(m, u_char *), len, uio)) != 0) {
                  m_freem(m0);
                  return (error);
              }
              m->m_len = len;
          }
          dst.sa_family = AF_UNSPEC;
          bcopy(mtod(m0, u_char *), dst.sa_data, PPP_HDRLEN);
          m0->m_data += PPP_HDRLEN;
          m0->m_len -= PPP_HDRLEN;
          return sc->sc_if.if_output(&sc->sc_if, m0, &dst, NULL);
      }
      
      /*
       * Line specific (tty) ioctl routine.
       * This discipline requires that tty device drivers call
       * the line specific l_ioctl routine from their ioctl routines.
       */
      int
      ppptioctl(struct tty *tp, u_long cmd, caddr_t data, int flag, struct proc *p)
      {
          struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;
          int error, s;
      
          if (sc == NULL || tp != (struct tty *) sc->sc_devp)
              return -1;
      
          error = 0;
          switch (cmd) {
          case PPPIOCSASYNCMAP:
              if ((error = suser(p)) != 0)
                  break;
              sc->sc_asyncmap[0] = *(u_int *)data;
              break;
      
          case PPPIOCGASYNCMAP:
              *(u_int *)data = sc->sc_asyncmap[0];
              break;
      
          case PPPIOCSRASYNCMAP:
              if ((error = suser(p)) != 0)
                  break;
              sc->sc_rasyncmap = *(u_int *)data;
              break;
      
          case PPPIOCGRASYNCMAP:
              *(u_int *)data = sc->sc_rasyncmap;
              break;
      
          case PPPIOCSXASYNCMAP:
              if ((error = suser(p)) != 0)
                  break;
              s = spltty();
              bcopy(data, sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
              sc->sc_asyncmap[1] = 0;                    /* mustn't escape 0x20 - 0x3f */
              sc->sc_asyncmap[2] &= ~0x40000000;  /* mustn't escape 0x5e */
              sc->sc_asyncmap[3] |= 0x60000000;   /* must escape 0x7d, 0x7e */
              splx(s);
              break;
      
          case PPPIOCGXASYNCMAP:
              bcopy(sc->sc_asyncmap, data, sizeof(sc->sc_asyncmap));
              break;
      
          default:
              NET_LOCK();
              error = pppioctl(sc, cmd, data, flag, p);
              NET_UNLOCK();
              if (error == 0 && cmd == PPPIOCSMRU)
                  ppppkt(sc);
          }
      
          return error;
      }
      
      /*
       * FCS lookup table as calculated by genfcstab.
       */
      static u_int16_t fcstab[256] = {
              0x0000,        0x1189,        0x2312,        0x329b,        0x4624,        0x57ad,        0x6536,        0x74bf,
              0x8c48,        0x9dc1,        0xaf5a,        0xbed3,        0xca6c,        0xdbe5,        0xe97e,        0xf8f7,
              0x1081,        0x0108,        0x3393,        0x221a,        0x56a5,        0x472c,        0x75b7,        0x643e,
              0x9cc9,        0x8d40,        0xbfdb,        0xae52,        0xdaed,        0xcb64,        0xf9ff,        0xe876,
              0x2102,        0x308b,        0x0210,        0x1399,        0x6726,        0x76af,        0x4434,        0x55bd,
              0xad4a,        0xbcc3,        0x8e58,        0x9fd1,        0xeb6e,        0xfae7,        0xc87c,        0xd9f5,
              0x3183,        0x200a,        0x1291,        0x0318,        0x77a7,        0x662e,        0x54b5,        0x453c,
              0xbdcb,        0xac42,        0x9ed9,        0x8f50,        0xfbef,        0xea66,        0xd8fd,        0xc974,
              0x4204,        0x538d,        0x6116,        0x709f,        0x0420,        0x15a9,        0x2732,        0x36bb,
              0xce4c,        0xdfc5,        0xed5e,        0xfcd7,        0x8868,        0x99e1,        0xab7a,        0xbaf3,
              0x5285,        0x430c,        0x7197,        0x601e,        0x14a1,        0x0528,        0x37b3,        0x263a,
              0xdecd,        0xcf44,        0xfddf,        0xec56,        0x98e9,        0x8960,        0xbbfb,        0xaa72,
              0x6306,        0x728f,        0x4014,        0x519d,        0x2522,        0x34ab,        0x0630,        0x17b9,
              0xef4e,        0xfec7,        0xcc5c,        0xddd5,        0xa96a,        0xb8e3,        0x8a78,        0x9bf1,
              0x7387,        0x620e,        0x5095,        0x411c,        0x35a3,        0x242a,        0x16b1,        0x0738,
              0xffcf,        0xee46,        0xdcdd,        0xcd54,        0xb9eb,        0xa862,        0x9af9,        0x8b70,
              0x8408,        0x9581,        0xa71a,        0xb693,        0xc22c,        0xd3a5,        0xe13e,        0xf0b7,
              0x0840,        0x19c9,        0x2b52,        0x3adb,        0x4e64,        0x5fed,        0x6d76,        0x7cff,
              0x9489,        0x8500,        0xb79b,        0xa612,        0xd2ad,        0xc324,        0xf1bf,        0xe036,
              0x18c1,        0x0948,        0x3bd3,        0x2a5a,        0x5ee5,        0x4f6c,        0x7df7,        0x6c7e,
              0xa50a,        0xb483,        0x8618,        0x9791,        0xe32e,        0xf2a7,        0xc03c,        0xd1b5,
              0x2942,        0x38cb,        0x0a50,        0x1bd9,        0x6f66,        0x7eef,        0x4c74,        0x5dfd,
              0xb58b,        0xa402,        0x9699,        0x8710,        0xf3af,        0xe226,        0xd0bd,        0xc134,
              0x39c3,        0x284a,        0x1ad1,        0x0b58,        0x7fe7,        0x6e6e,        0x5cf5,        0x4d7c,
              0xc60c,        0xd785,        0xe51e,        0xf497,        0x8028,        0x91a1,        0xa33a,        0xb2b3,
              0x4a44,        0x5bcd,        0x6956,        0x78df,        0x0c60,        0x1de9,        0x2f72,        0x3efb,
              0xd68d,        0xc704,        0xf59f,        0xe416,        0x90a9,        0x8120,        0xb3bb,        0xa232,
              0x5ac5,        0x4b4c,        0x79d7,        0x685e,        0x1ce1,        0x0d68,        0x3ff3,        0x2e7a,
              0xe70e,        0xf687,        0xc41c,        0xd595,        0xa12a,        0xb0a3,        0x8238,        0x93b1,
              0x6b46,        0x7acf,        0x4854,        0x59dd,        0x2d62,        0x3ceb,        0x0e70,        0x1ff9,
              0xf78f,        0xe606,        0xd49d,        0xc514,        0xb1ab,        0xa022,        0x92b9,        0x8330,
              0x7bc7,        0x6a4e,        0x58d5,        0x495c,        0x3de3,        0x2c6a,        0x1ef1,        0x0f78
      };
      
      /*
       * Calculate a new FCS given the current FCS and the new data.
       */
      u_int16_t
      pppfcs(u_int16_t fcs, u_char *cp, int len)
      {
          while (len--)
              fcs = PPP_FCS(fcs, *cp++);
          return (fcs);
      }
      
      /*
       * This gets called from pppoutput when a new packet is
       * put on a queue.
       */
      void
      pppasyncstart(struct ppp_softc *sc)
      {
          struct tty *tp = (struct tty *) sc->sc_devp;
          struct mbuf *m;
          int len;
          u_char *start, *stop, *cp;
          int n, ndone, done, idle;
          struct mbuf *m2;
          int s;
      
          KERNEL_LOCK();
          idle = 0;
          while (CCOUNT(&tp->t_outq) < tp->t_hiwat) {
              /*
               * See if we have an existing packet partly sent.
               * If not, get a new packet and start sending it.
               */
              m = sc->sc_outm;
              if (m == NULL) {
                  /*
                   * Get another packet to be sent.
                   */
                  m = ppp_dequeue(sc);
                  if (m == NULL) {
                      idle = 1;
                      break;
                  }
      
                  /*
                   * The extra PPP_FLAG will start up a new packet, and thus
                   * will flush any accumulated garbage.  We do this whenever
                   * the line may have been idle for some time.
                   */
                  if (CCOUNT(&tp->t_outq) == 0) {
                      ++sc->sc_stats.ppp_obytes;
                      (void) putc(PPP_FLAG, &tp->t_outq);
                  }
      
                  /* Calculate the FCS for the first mbuf's worth. */
                  sc->sc_outfcs = pppfcs(PPP_INITFCS, mtod(m, u_char *), m->m_len);
              }
      
              for (;;) {
                  start = mtod(m, u_char *);
                  len = m->m_len;
                  stop = start + len;
                  while (len > 0) {
                      /*
                       * Find out how many bytes in the string we can
                       * handle without doing something special.
                       */
                      for (cp = start; cp < stop; cp++)
                          if (ESCAPE_P(*cp))
                              break;
                      n = cp - start;
                      if (n) {
                          /* NetBSD (0.9 or later), 4.3-Reno or similar. */
                          ndone = n - b_to_q(start, n, &tp->t_outq);
                          len -= ndone;
                          start += ndone;
                          sc->sc_stats.ppp_obytes += ndone;
      
                          if (ndone < n)
                              break;        /* packet doesn't fit */
                      }
                      /*
                       * If there are characters left in the mbuf,
                       * the first one must be special.
                       * Put it out in a different form.
                       */
                      if (len) {
                          s = spltty();
                          if (putc(PPP_ESCAPE, &tp->t_outq)) {
                              splx(s);
                              break;
                          }
                          if (putc(*start ^ PPP_TRANS, &tp->t_outq)) {
                              (void) unputc(&tp->t_outq);
                              splx(s);
                              break;
                          }
                          splx(s);
                          sc->sc_stats.ppp_obytes += 2;
                          start++;
                          len--;
                      }
                  }
      
                  /*
                   * If we didn't empty this mbuf, remember where we're up to.
                   * If we emptied the last mbuf, try to add the FCS and closing
                   * flag, and if we can't, leave sc_outm pointing to m, but with
                   * m->m_len == 0, to remind us to output the FCS and flag later.
                   */
                  done = len == 0;
                  if (done && m->m_next == NULL) {
                      u_char *p, *q;
                      int c;
                      u_char endseq[8];
      
                      /*
                       * We may have to escape the bytes in the FCS.
                       */
                      p = endseq;
                      c = ~sc->sc_outfcs & 0xFF;
                      if (ESCAPE_P(c)) {
                          *p++ = PPP_ESCAPE;
                          *p++ = c ^ PPP_TRANS;
                      } else
                          *p++ = c;
                      c = (~sc->sc_outfcs >> 8) & 0xFF;
                      if (ESCAPE_P(c)) {
                          *p++ = PPP_ESCAPE;
                          *p++ = c ^ PPP_TRANS;
                      } else
                          *p++ = c;
                      *p++ = PPP_FLAG;
      
                      /*
                       * Try to output the FCS and flag.  If the bytes
                       * don't all fit, back out.
                       */
                      s = spltty();
                      for (q = endseq; q < p; ++q)
                          if (putc(*q, &tp->t_outq)) {
                              done = 0;
                              for (; q > endseq; --q)
                                  unputc(&tp->t_outq);
                              break;
                          }
                      splx(s);
                      if (done)
                          sc->sc_stats.ppp_obytes += q - endseq;
                  }
      
                  if (!done) {
                      /* remember where we got to */
                      m->m_data = start;
                      m->m_len = len;
                      break;
                  }
      
                  /* Finished with this mbuf; free it and move on. */
                  m2 = m_free(m);
                  m = m2;
                  if (m == NULL) {
                      /* Finished a packet */
                      break;
                  }
                  sc->sc_outfcs = pppfcs(sc->sc_outfcs, mtod(m, u_char *), m->m_len);
              }
      
              /*
               * If m == NULL, we have finished a packet.
               * If m != NULL, we've either done as much work this time
               * as we need to, or else we've filled up the output queue.
               */
              sc->sc_outm = m;
              if (m)
                  break;
          }
      
          /* Call pppstart to start output again if necessary. */
          s = spltty();
          pppstart_internal(tp, 0);
      
          /*
           * This timeout is needed for operation on a pseudo-tty,
           * because the pty code doesn't call pppstart after it has
           * drained the t_outq.
           */
          if (!idle && (sc->sc_flags & SC_TIMEOUT) == 0) {
              timeout_add(&sc->sc_timo, 1);
              sc->sc_flags |= SC_TIMEOUT;
          }
      
          splx(s);
          KERNEL_UNLOCK();
      }
      
      /*
       * This gets called when a received packet is placed on
       * the inq.
       */
      void
      pppasyncctlp(struct ppp_softc *sc)
      {
          struct tty *tp;
          int s;
      
          KERNEL_LOCK();
          /* Put a placeholder byte in canq for ttpoll()/ttnread(). */
          s = spltty();
          tp = (struct tty *) sc->sc_devp;
          putc(0, &tp->t_canq);
          ttwakeup(tp);
          splx(s);
          KERNEL_UNLOCK();
      }
      
      /*
       * Start output on async tty interface.  If the transmit queue
       * has drained sufficiently, arrange for pppasyncstart to be
       * called later.
       */
      int
      pppstart_internal(struct tty *tp, int force)
      {
          struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;
      
          /*
           * If there is stuff in the output queue, send it now.
           * We are being called in lieu of ttstart and must do what it would.
           */
          if (tp->t_oproc != NULL)
              (*tp->t_oproc)(tp);
      
          /*
           * If the transmit queue has drained and the tty has not hung up
           * or been disconnected from the ppp unit, then tell if_ppp.c that
           * we need more output.
           */
          if ((CCOUNT(&tp->t_outq) < tp->t_lowat || force)
              && !((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0)
              && sc != NULL && tp == (struct tty *) sc->sc_devp) {
              ppp_restart(sc);
          }
      
          return 0;
      }
      
      int
      pppstart(struct tty *tp)
      {
              return pppstart_internal(tp, 0);
      }
      
      /*
       * Timeout routine - try to start some more output.
       */
      void
      ppp_timeout(void *x)
      {
          struct ppp_softc *sc = (struct ppp_softc *) x;
          struct tty *tp = (struct tty *) sc->sc_devp;
          int s;
      
          s = spltty();
          sc->sc_flags &= ~SC_TIMEOUT;
          pppstart_internal(tp, 1);
          splx(s);
      }
      
      /*
       * Allocate enough mbuf to handle current MRU.
       */
      void
      ppppkt(struct ppp_softc *sc)
      {
          struct ppp_pkt **pktp, *pkt;
          int len;
          int s;
      
          s = spltty();
          pktp = &sc->sc_pkt;
          for (len = PKT_MAXLEN(sc); len > 0; len -= sizeof(pkt->p_buf)) {
              pkt = *pktp;
              if (pkt == NULL) {
                  pkt = pool_get(&ppp_pkts, PR_NOWAIT);
                  if (pkt == NULL)
                      break;
                  PKT_NEXT(pkt) = NULL;
                  PKT_PREV(pkt) = *pktp;
                  PKT_LEN(pkt) = 0;
                  *pktp = pkt;
              }
              pktp = &PKT_NEXT(pkt);
          }
          splx(s);
      }
      
      void
      ppp_pkt_free(struct ppp_pkt *pkt)
      {
              struct ppp_pkt *next;
      
              while (pkt != NULL) {
                      next = PKT_NEXT(pkt);
                      pool_put(&ppp_pkts, pkt);
                      pkt = next;
              }
      }
      
      /*
       * tty interface receiver interrupt.
       */
      static unsigned int paritytab[8] = {
          0x96696996, 0x69969669, 0x69969669, 0x96696996,
          0x69969669, 0x96696996, 0x96696996, 0x69969669
      };
      
      int
      pppinput(int c, struct tty *tp)
      {
          struct ppp_softc *sc;
          struct ppp_pkt *pkt;
          int ilen, s;
      
          sc = (struct ppp_softc *) tp->t_sc;
          if (sc == NULL || tp != (struct tty *) sc->sc_devp)
              return 0;
      
          ++tk_nin;
          ++sc->sc_stats.ppp_ibytes;
      
          if (c & TTY_FE) {
              /* framing error or overrun on this char - abort packet */
              if (sc->sc_flags & SC_DEBUG)
                  printf("%s: bad char %x\n", sc->sc_if.if_xname, c);
              goto flush;
          }
      
          c &= 0xff;
      
          /*
           * Handle software flow control of output.
           */
          if (tp->t_iflag & IXON) {
              if (c == tp->t_cc[VSTOP] && tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
                  if ((tp->t_state & TS_TTSTOP) == 0) {
                      tp->t_state |= TS_TTSTOP;
                      (*cdevsw[major(tp->t_dev)].d_stop)(tp, 0);
                  }
                  return 0;
              }
              if (c == tp->t_cc[VSTART] && tp->t_cc[VSTART] != _POSIX_VDISABLE) {
                  tp->t_state &= ~TS_TTSTOP;
                  if (tp->t_oproc != NULL)
                      (*tp->t_oproc)(tp);
                  return 0;
              }
          }
      
          s = spltty();
          if (c & 0x80)
              sc->sc_flags |= SC_RCV_B7_1;
          else
              sc->sc_flags |= SC_RCV_B7_0;
          if (paritytab[c >> 5] & (1 << (c & 0x1F)))
              sc->sc_flags |= SC_RCV_ODDP;
          else
              sc->sc_flags |= SC_RCV_EVNP;
          splx(s);
      
          if (sc->sc_flags & SC_LOG_RAWIN)
              ppplogchar(sc, c);
      
          if (c == PPP_FLAG) {
              ilen = sc->sc_ilen;
              sc->sc_ilen = 0;
      
              if (sc->sc_rawin_count > 0)
                  ppplogchar(sc, -1);
      
              /*
               * If SC_ESCAPED is set, then we've seen the packet
               * abort sequence "}~".
               */
              if (sc->sc_flags & (SC_FLUSH | SC_ESCAPED)
                  || (ilen > 0 && sc->sc_fcs != PPP_GOODFCS)) {
                  s = spltty();
                  sc->sc_flags |= SC_PKTLOST;        /* note the dropped packet */
                  if ((sc->sc_flags & (SC_FLUSH | SC_ESCAPED)) == 0){
                      if (sc->sc_flags & SC_DEBUG)
                          printf("%s: bad fcs %x\n", sc->sc_if.if_xname,
                              sc->sc_fcs);
                      sc->sc_if.if_ierrors++;
                      sc->sc_stats.ppp_ierrors++;
                  } else
                      sc->sc_flags &= ~(SC_FLUSH | SC_ESCAPED);
                  splx(s);
                  return 0;
              }
      
              if (ilen < PPP_HDRLEN + PPP_FCSLEN) {
                  if (ilen) {
                      if (sc->sc_flags & SC_DEBUG)
                          printf("%s: too short (%d)\n", sc->sc_if.if_xname, ilen);
                      s = spltty();
                      sc->sc_if.if_ierrors++;
                      sc->sc_stats.ppp_ierrors++;
                      sc->sc_flags |= SC_PKTLOST;
                      splx(s);
                  }
                  return 0;
              }
      
              /*
               * Remove FCS trailer.
               */
              ilen -= 2;
              pkt = sc->sc_pktc;
              if (--PKT_LEN(pkt) == 0) {
                  pkt = PKT_PREV(pkt);
                  sc->sc_pktc = pkt;
              }
              PKT_LEN(pkt)--;
      
              /* excise this mbuf chain */
              pkt = sc->sc_pkt;
              sc->sc_pkt = sc->sc_pktc = PKT_NEXT(sc->sc_pktc);
              PKT_NEXT(pkt) = NULL;
      
              ppppktin(sc, pkt, sc->sc_flags & SC_PKTLOST);
              if (sc->sc_flags & SC_PKTLOST) {
                  s = spltty();
                  sc->sc_flags &= ~SC_PKTLOST;
                  splx(s);
              }
      
              ppppkt(sc);
              return 0;
          }
      
          if (sc->sc_flags & SC_FLUSH) {
              if (sc->sc_flags & SC_LOG_FLUSH)
                  ppplogchar(sc, c);
              return 0;
          }
      
          if (c < 0x20 && (sc->sc_rasyncmap & (1 << c)))
              return 0;
      
          s = spltty();
          if (sc->sc_flags & SC_ESCAPED) {
              sc->sc_flags &= ~SC_ESCAPED;
              c ^= PPP_TRANS;
          } else if (c == PPP_ESCAPE) {
              sc->sc_flags |= SC_ESCAPED;
              splx(s);
              return 0;
          }
          splx(s);
      
          /*
           * Initialize buffer on first octet received.
           * First octet could be address or protocol (when compressing
           * address/control).
           * Second octet is control.
           * Third octet is first or second (when compressing protocol)
           * octet of protocol.
           * Fourth octet is second octet of protocol.
           */
          if (sc->sc_ilen == 0) {
              /* reset the first input mbuf */
              if (sc->sc_pkt == NULL) {
                  ppppkt(sc);
                  if (sc->sc_pkt == NULL) {
                      if (sc->sc_flags & SC_DEBUG)
                          printf("%s: no input mbufs!\n", sc->sc_if.if_xname);
                      goto flush;
                  }
              }
              pkt = sc->sc_pkt;
              PKT_LEN(pkt) = 0;
              sc->sc_pktc = pkt;
              sc->sc_pktp = pkt->p_buf;
              sc->sc_fcs = PPP_INITFCS;
              if (c != PPP_ALLSTATIONS) {
                  if (sc->sc_flags & SC_REJ_COMP_AC) {
                      if (sc->sc_flags & SC_DEBUG)
                          printf("%s: garbage received: 0x%x (need 0xFF)\n",
                              sc->sc_if.if_xname, c);
                      goto flush;
                  }
                  *sc->sc_pktp++ = PPP_ALLSTATIONS;
                  *sc->sc_pktp++ = PPP_UI;
                  sc->sc_ilen += 2;
                  PKT_LEN(pkt) += 2;
              }
          }
          if (sc->sc_ilen == 1 && c != PPP_UI) {
              if (sc->sc_flags & SC_DEBUG)
                  printf("%s: missing UI (0x3), got 0x%x\n",
                      sc->sc_if.if_xname, c);
              goto flush;
          }
          if (sc->sc_ilen == 2 && (c & 1) == 1) {
              /* a compressed protocol */
              *sc->sc_pktp++ = 0;
              sc->sc_ilen++;
              PKT_LEN(sc->sc_pktc)++;
          }
          if (sc->sc_ilen == 3 && (c & 1) == 0) {
              if (sc->sc_flags & SC_DEBUG)
                  printf("%s: bad protocol %x\n", sc->sc_if.if_xname,
                      (sc->sc_pktp[-1] << 8) + c);
              goto flush;
          }
      
          /* packet beyond configured mru? */
          if (++sc->sc_ilen > PKT_MAXLEN(sc)) {
              if (sc->sc_flags & SC_DEBUG)
                  printf("%s: packet too big\n", sc->sc_if.if_xname);
              goto flush;
          }
      
          /* is this packet full? */
          pkt = sc->sc_pktc;
          if (PKT_LEN(pkt) >= sizeof(pkt->p_buf)) {
              if (PKT_NEXT(pkt) == NULL) {
                  ppppkt(sc);
                  if (PKT_NEXT(pkt) == NULL) {
                      if (sc->sc_flags & SC_DEBUG)
                          printf("%s: too few input packets!\n", sc->sc_if.if_xname);
                      goto flush;
                  }
              }
              sc->sc_pktc = pkt = PKT_NEXT(pkt);
              PKT_LEN(pkt) = 0;
              sc->sc_pktp = pkt->p_buf;
          }
      
          ++PKT_LEN(pkt);
          *sc->sc_pktp++ = c;
          sc->sc_fcs = PPP_FCS(sc->sc_fcs, c);
          return 0;
      
       flush:
          if (!(sc->sc_flags & SC_FLUSH)) {
              s = spltty();
              sc->sc_if.if_ierrors++;
              sc->sc_stats.ppp_ierrors++;
              sc->sc_flags |= SC_FLUSH;
              splx(s);
              if (sc->sc_flags & SC_LOG_FLUSH)
                  ppplogchar(sc, c);
          }
          return 0;
      }
      
      #define MAX_DUMP_BYTES        128
      
      void
      ppplogchar(struct ppp_softc *sc, int c)
      {
          if (c >= 0)
              sc->sc_rawin[sc->sc_rawin_count++] = c;
          if (sc->sc_rawin_count >= sizeof(sc->sc_rawin)
              || (c < 0 && sc->sc_rawin_count > 0)) {
              printf("%s input: ", sc->sc_if.if_xname);
              pppdumpb(sc->sc_rawin, sc->sc_rawin_count);
              sc->sc_rawin_count = 0;
          }
      }
      
      void
      pppdumpb(u_char *b, int l)
      {
          char buf[3*MAX_DUMP_BYTES+4];
          char *bp = buf;
          static char digits[] = "0123456789abcdef";
      
          while (l--) {
              if (bp >= buf + sizeof(buf) - 3) {
                  *bp++ = '>';
                  break;
              }
              *bp++ = digits[*b >> 4]; /* convert byte to ascii hex */
              *bp++ = digits[*b++ & 0xf];
              *bp++ = ' ';
          }
      
          *bp = 0;
          printf("%s\n", buf);
      }
      
      #endif        /* NPPP > 0 */
      /*        $OpenBSD: uvm_addr.h,v 1.7 2017/01/17 17:19:21 stefan Exp $        */
      
      /*
       * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
       *
       * Permission to use, copy, modify, and distribute this software for any
       * purpose with or without fee is hereby granted, provided that the above
       * copyright notice and this permission notice appear in all copies.
       *
       * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       */
      
      #ifndef _UVM_UVM_ADDR_H_
      #define _UVM_UVM_ADDR_H_
      
      /*
       * Address selection logic.
       *
       * Address selection is just that: selection. These functions may make no
       * changes to the map, except for their own state (which is passed as a
       * uaddr_state pointer).
       */
      
      
      /*
       * UVM address selection base state.
       *
       * Each uvm address algorithm requires these parameters:
       * - lower bound address (page aligned)
       * - upper bound address (page aligned)
       * - function address pointers
       */
      struct uvm_addr_state {
              vaddr_t uaddr_minaddr;
              vaddr_t uaddr_maxaddr;
              const struct uvm_addr_functions *uaddr_functions;
      };
      
      /*
       * This structure describes one algorithm implementation.
       *
       * Each algorithm is described in terms of:
       * - uaddr_select: an address selection algorithm
       * - uaddr_free_insert: a freelist insertion function (optional)
       * - uaddr_free_remove: a freelist deletion function (optional)
       * - uaddr_destroy: a destructor for the algorithm state
       */
      struct uvm_addr_functions {
              int (*uaddr_select)(struct vm_map *map,
                  struct uvm_addr_state *uaddr,
                  struct vm_map_entry **entry_out, vaddr_t *addr_out,
                  vsize_t sz, vaddr_t align, vaddr_t offset,
                  vm_prot_t prot, vaddr_t hint);
              void (*uaddr_free_insert)(struct vm_map *map,
                  struct uvm_addr_state *uaddr_state,
                  struct vm_map_entry *entry);
              void (*uaddr_free_remove)(struct vm_map *map,
                  struct uvm_addr_state *uaddr_state,
                  struct vm_map_entry *entry);
              void (*uaddr_destroy)(struct uvm_addr_state *uaddr_state);
              void (*uaddr_print)(struct uvm_addr_state *uaddr_state, boolean_t full,
                  int (*pr)(const char *, ...));
      
              const char* uaddr_name;                /* Name of the allocator. */
      };
      
      
      #ifdef _KERNEL
      
      void                         uvm_addr_init(void);
      void                         uvm_addr_destroy(struct uvm_addr_state *);
      vaddr_t                         uvm_addr_align(vaddr_t, vaddr_t, vaddr_t);
      vaddr_t                         uvm_addr_align_back(vaddr_t, vaddr_t, vaddr_t);
      int                         uvm_addr_linsearch(struct vm_map *,
                                  struct uvm_addr_state *, struct vm_map_entry **,
                                  vaddr_t *addr_out, vaddr_t, vsize_t,
                                  vaddr_t, vaddr_t, int, vaddr_t, vaddr_t,
                                  vsize_t, vsize_t);
      int                         uvm_addr_invoke(struct vm_map *,
                                  struct uvm_addr_state *, struct vm_map_entry **,
                                  struct vm_map_entry **, vaddr_t*,
                                  vsize_t, vaddr_t, vaddr_t, vm_prot_t, vaddr_t);
      #if 0
      struct uvm_addr_state        *uaddr_lin_create(vaddr_t, vaddr_t);
      #endif
      struct uvm_addr_state        *uaddr_rnd_create(vaddr_t, vaddr_t);
      #ifndef SMALL_KERNEL
      struct uvm_addr_state        *uaddr_bestfit_create(vaddr_t, vaddr_t);
      struct uvm_addr_state        *uaddr_pivot_create(vaddr_t, vaddr_t);
      struct uvm_addr_state        *uaddr_stack_brk_create(vaddr_t, vaddr_t);
      #endif /* SMALL_KERNEL */
      int                         uvm_addr_fitspace(vaddr_t *, vaddr_t *,
                                  vaddr_t, vaddr_t, vsize_t, vaddr_t, vaddr_t,
                                  vsize_t, vsize_t);
      
      #if defined(DEBUG) || defined(DDB)
      void                         uvm_addr_print(struct uvm_addr_state *, const char *,
                                  boolean_t, int (*pr)(const char *, ...));
      #endif /* DEBUG || DDB */
      
      /*
       * Kernel bootstrap allocator.
       */
      RBT_HEAD(uaddr_free_rbtree, vm_map_entry);
   40 RBT_PROTOTYPE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
          uvm_mapent_fspace_cmp);
      
      extern struct uvm_addr_state uaddr_kbootstrap;
      
      #endif /* _KERNEL */
      #endif /* _UVM_UVM_ADDR_H_ */
      /*        $OpenBSD: in6_src.c,v 1.81 2016/12/02 11:16:04 mpi Exp $        */
      /*        $KAME: in6_src.c,v 1.36 2001/02/06 04:08:17 itojun Exp $        */
      
      /*
       * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the project nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      /*
       * Copyright (c) 1982, 1986, 1991, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)in_pcb.c        8.2 (Berkeley) 1/4/94
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/mbuf.h>
      #include <sys/protosw.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/ioctl.h>
      #include <sys/errno.h>
      #include <sys/time.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      #include <net/route.h>
      
      #include <netinet/in.h>
      #include <netinet/ip.h>
      #include <netinet/in_pcb.h>
      #include <netinet6/in6_var.h>
      #include <netinet/ip6.h>
      #include <netinet6/ip6_var.h>
      #include <netinet6/nd6.h>
      
      int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
          struct ip6_moptions *, struct route_in6 *, struct ifnet **, u_int);
      
      /*
       * Return an IPv6 address, which is the most appropriate for a given
       * destination and pcb. We need the additional opt parameter because
       * the values set at pcb level can be overriden via cmsg.
       */
      int
      in6_pcbselsrc(struct in6_addr **in6src, struct sockaddr_in6 *dstsock,
          struct inpcb *inp, struct ip6_pktopts *opts)
  123 {
              struct ip6_moptions *mopts = inp->inp_moptions6;
              struct route_in6 *ro = &inp->inp_route6;
              struct in6_addr *laddr = &inp->inp_laddr6;
              u_int rtableid = inp->inp_rtableid;
              struct ifnet *ifp = NULL;
              struct in6_addr *dst;
              struct in6_ifaddr *ia6 = NULL;
              struct in6_pktinfo *pi = NULL;
              int        error;
      
              dst = &dstsock->sin6_addr;
      
              /*
               * If the source address is explicitly specified by the caller,
               * check if the requested source address is indeed a unicast address
               * assigned to the node, and can be used as the packet's source
               * address.  If everything is okay, use the address as source.
               */
  111         if (opts && (pi = opts->ip6po_pktinfo) &&
   12             !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
                      struct sockaddr_in6 sa6;
      
                      /* get the outgoing interface */
                      error = in6_selectif(dstsock, opts, mopts, ro, &ifp, rtableid);
    2                 if (error)
                              return (error);
      
                      bzero(&sa6, sizeof(sa6));
                      sa6.sin6_family = AF_INET6;
                      sa6.sin6_len = sizeof(sa6);
                      sa6.sin6_addr = pi->ipi6_addr;
      
   11                 if (ifp && IN6_IS_SCOPE_EMBED(&sa6.sin6_addr))
                              sa6.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
                      if_put(ifp); /* put reference from in6_selectif */
      
                      ia6 = ifatoia6(ifa_ifwithaddr(sin6tosa(&sa6), rtableid));
   10                 if (ia6 == NULL || (ia6->ia6_flags &
                           (IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)))
                              return (EADDRNOTAVAIL);
      
    1                 pi->ipi6_addr = sa6.sin6_addr; /* XXX: this overrides pi */
      
                      *in6src = &pi->ipi6_addr;
                      return (0);
              }
      
              /*
               * If the source address is not specified but the socket(if any)
               * is already bound, use the bound address.
               */
   13         if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) {
                      *in6src = laddr;
                      return (0);
              }
      
              /*
               * If the caller doesn't specify the source address but
               * the outgoing interface, use an address associated with
               * the interface.
               */
   61         if (pi && pi->ipi6_ifindex) {
                      ifp = if_get(pi->ipi6_ifindex);
                      if (ifp == NULL)
                              return (ENXIO); /* XXX: better error? */
      
                      ia6 = in6_ifawithscope(ifp, dst, rtableid);
                      if_put(ifp);
      
    5                 if (ia6 == NULL)
                              return (EADDRNOTAVAIL);
      
   39                 *in6src = &ia6->ia_addr.sin6_addr;
                      return (0);
              }
      
              error = in6_selectsrc(in6src, dstsock, mopts, rtableid);
    2         if (error != EADDRNOTAVAIL)
                      return (error);
      
              /*
               * If route is known or can be allocated now,
               * our src addr is taken from the i/f, else punt.
               */
    2         if (!rtisvalid(ro->ro_rt) || (ro->ro_tableid != rtableid) ||
                  !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
                      rtfree(ro->ro_rt);
                      ro->ro_rt = NULL;
              }
    2         if (ro->ro_rt == NULL) {
                      struct sockaddr_in6 *sa6;
      
                      /* No route yet, so try to acquire one */
                      bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
   55                 ro->ro_tableid = rtableid;
                      sa6 = &ro->ro_dst;
                      sa6->sin6_family = AF_INET6;
                      sa6->sin6_len = sizeof(struct sockaddr_in6);
                      sa6->sin6_addr = *dst;
                      sa6->sin6_scope_id = dstsock->sin6_scope_id;
                      ro->ro_rt = rtalloc(sin6tosa(&ro->ro_dst),
                          RT_RESOLVE, ro->ro_tableid);
              }
      
              /*
               * in_pcbconnect() checks out IFF_LOOPBACK to skip using
               * the address. But we don't know why it does so.
               * It is necessary to ensure the scope even for lo0
               * so doesn't check out IFF_LOOPBACK.
               */
      
   57         if (ro->ro_rt) {
                      ifp = if_get(ro->ro_rt->rt_ifidx);
                      if (ifp != NULL) {
                              ia6 = in6_ifawithscope(ifp, dst, rtableid);
                              if_put(ifp);
                      }
   52                 if (ia6 == NULL) /* xxx scope error ?*/
                              ia6 = ifatoia6(ro->ro_rt->rt_ifa);
              }
    2         if (ia6 == NULL)
                      return (EHOSTUNREACH);        /* no route */
      
              *in6src = &ia6->ia_addr.sin6_addr;
              return (0);
      }
      
      /*
       * Return an IPv6 address, which is the most appropriate for a given
       * destination and multicast options.
       * If necessary, this function lookups the routing table and returns
       * an entry to the caller for later use.
       */
      int
      in6_selectsrc(struct in6_addr **in6src, struct sockaddr_in6 *dstsock,
          struct ip6_moptions *mopts, unsigned int rtableid)
   61 {
              struct ifnet *ifp = NULL;
              struct in6_addr *dst;
              struct in6_ifaddr *ia6 = NULL;
      
              dst = &dstsock->sin6_addr;
      
              /*
               * If the destination address is a link-local unicast address or
               * a link/interface-local multicast address, and if the outgoing
               * interface is specified by the sin6_scope_id filed, use an address
               * associated with the interface.
               * XXX: We're now trying to define more specific semantics of
               *      sin6_scope_id field, so this part will be rewritten in
               *      the near future.
               */
   61         if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
                   IN6_IS_ADDR_MC_INTFACELOCAL(dst)) && dstsock->sin6_scope_id) {
                      ifp = if_get(dstsock->sin6_scope_id);
                      if (ifp == NULL)
                              return (ENXIO); /* XXX: better error? */
      
                      ia6 = in6_ifawithscope(ifp, dst, rtableid);
                      if_put(ifp);
      
    1                 if (ia6 == NULL)
                              return (EADDRNOTAVAIL);
      
                      *in6src = &ia6->ia_addr.sin6_addr;
                      return (0);
              }
      
              /*
               * If the destination address is a multicast address and
               * the outgoing interface for the address is specified
               * by the caller, use an address associated with the interface.
               * Even if the outgoing interface is not specified, we also
               * choose a loopback interface as the outgoing interface.
               */
   16         if (IN6_IS_ADDR_MULTICAST(dst)) {
   10                 ifp = mopts ? if_get(mopts->im6o_ifidx) : NULL;
      
   12                 if (!ifp && dstsock->sin6_scope_id)
                              ifp = if_get(htons(dstsock->sin6_scope_id));
      
    2                 if (ifp) {
                              ia6 = in6_ifawithscope(ifp, dst, rtableid);
                              if_put(ifp);
      
    1                         if (ia6 == NULL)
                                      return (EADDRNOTAVAIL);
      
    2                         *in6src = &ia6->ia_addr.sin6_addr;
                              return (0);
                      }
              }
      
              return (EADDRNOTAVAIL);
      }
      
      struct rtentry *
      in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
          struct route_in6 *ro, unsigned int rtableid)
   68 {
              struct in6_addr *dst;
      
              dst = &dstsock->sin6_addr;
      
              /*
               * Use a cached route if it exists and is valid, else try to allocate
               * a new one.
               */
              if (ro) {
   12                 if (rtisvalid(ro->ro_rt))
   58                         KASSERT(sin6tosa(&ro->ro_dst)->sa_family == AF_INET6);
   17                 if (!rtisvalid(ro->ro_rt) ||
                          !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
                              rtfree(ro->ro_rt);
                              ro->ro_rt = NULL;
                      }
   55                 if (ro->ro_rt == NULL) {
                              struct sockaddr_in6 *sa6;
      
                              /* No route yet, so try to acquire one */
                              bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
                              ro->ro_tableid = rtableid;
                              sa6 = &ro->ro_dst;
                              *sa6 = *dstsock;
                              sa6->sin6_scope_id = 0;
                              ro->ro_tableid = rtableid;
                              ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst),
                                  NULL, ro->ro_tableid);
                      }
      
                      /*
                       * Check if the outgoing interface conflicts with
                       * the interface specified by ipi6_ifindex (if specified).
                       * Note that loopback interface is always okay.
                       * (this may happen when we are sending a packet to one of
                       *  our own addresses.)
                       */
   64                 if (opts && opts->ip6po_pktinfo &&
                          opts->ip6po_pktinfo->ipi6_ifindex) {
    2                         if (ro->ro_rt != NULL &&
    2                             !ISSET(ro->ro_rt->rt_flags, RTF_LOCAL) &&
                                  ro->ro_rt->rt_ifidx !=
                                  opts->ip6po_pktinfo->ipi6_ifindex) {
                                          return (NULL);
                              }
                      }
      
                      return (ro->ro_rt);
              }
      
              return (NULL);
      }
      
      int
      in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
          struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp,
          u_int rtableid)
   13 {
              struct rtentry *rt = NULL;
              struct in6_pktinfo *pi = NULL;
      
              /* If the caller specify the outgoing interface explicitly, use it. */
    7         if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
                      *retifp = if_get(pi->ipi6_ifindex);
    6                 if (*retifp != NULL)
                              return (0);
              }
      
              /*
               * If the destination address is a multicast address and the outgoing
               * interface for the address is specified by the caller, use it.
               */
    7         if (IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) &&
                  mopts != NULL && (*retifp = if_get(mopts->im6o_ifidx)) != NULL)
                          return (0);
      
              rt = in6_selectroute(dstsock, opts, ro, rtableid);
    1         if (rt == NULL)
                      return (EHOSTUNREACH);
      
              /*
               * do not use a rejected or black hole route.
               * XXX: this check should be done in the L2 output routine.
               * However, if we skipped this check here, we'd see the following
               * scenario:
               * - install a rejected route for a scoped address prefix
               *   (like fe80::/10)
               * - send a packet to a destination that matches the scoped prefix,
               *   with ambiguity about the scope zone.
               * - pick the outgoing interface from the route, and disambiguate the
               *   scope zone with the interface.
               * - ip6_output() would try to get another route with the "new"
               *   destination, which may be valid.
               * - we'd see no error on output.
               * Although this may not be very harmful, it should still be confusing.
               * We thus reject the case here.
               */
              if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE)))
    1                 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
      
              if (rt != NULL)
    5                 *retifp = if_get(rt->rt_ifidx);
      
              return (0);
      }
      
      int
      in6_selecthlim(struct inpcb *in6p)
  148 {
  148         if (in6p && in6p->inp_hops >= 0)
                      return (in6p->inp_hops);
      
              return (ip6_defhlim);
      }
      
      /*
       * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
       * If the address scope of is link-local, embed the interface index in the
       * address.  The routine determines our precedence
       * between advanced API scope/interface specification and basic API
       * specification.
       *
       * this function should be nuked in the future, when we get rid of
       * embedded scopeid thing.
       *
       * XXX actually, it is over-specification to return ifp against sin6_scope_id.
       * there can be multiple interfaces that belong to a particular scope zone
       * (in specification, we have 1:N mapping between a scope zone and interfaces).
       * we may want to change the function to return something other than ifp.
       */
      int
      in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6,
          struct inpcb *in6p)
  149 {
              struct ifnet *ifp = NULL;
              u_int32_t scopeid;
      
              *in6 = sin6->sin6_addr;
              scopeid = sin6->sin6_scope_id;
      
              /*
               * don't try to read sin6->sin6_addr beyond here, since the caller may
               * ask us to overwrite existing sockaddr_in6
               */
      
  149         if (IN6_IS_SCOPE_EMBED(in6)) {
                      struct in6_pktinfo *pi;
      
                      /*
                       * KAME assumption: link id == interface id
                       */
      
   27                 if (in6p && in6p->inp_outputopts6 &&
    3                     (pi = in6p->inp_outputopts6->ip6po_pktinfo) &&
                          pi->ipi6_ifindex) {
                              ifp = if_get(pi->ipi6_ifindex);
                              if (ifp == NULL)
                                      return ENXIO;  /* XXX EINVAL? */
    4                         in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
   12                 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) &&
   18                     in6p->inp_moptions6 &&
                          (ifp = if_get(in6p->inp_moptions6->im6o_ifidx))) {
    1                         in6->s6_addr16[1] = htons(ifp->if_index);
   15                 } else if (scopeid) {
                              ifp = if_get(scopeid);
    4                         if (ifp == NULL)
                                      return ENXIO;  /* XXX EINVAL? */
                              /*XXX assignment to 16bit from 32bit variable */
   10                         in6->s6_addr16[1] = htons(scopeid & 0xffff);
                      }
                      if_put(ifp);
              }
      
              return 0;
      }
      
      /*
       * generate standard sockaddr_in6 from embedded form.
       * touches sin6_addr and sin6_scope_id only.
       *
       * this function should be nuked in the future, when we get rid of
       * embedded scopeid thing.
       */
      void
      in6_recoverscope(struct sockaddr_in6 *sin6, const struct in6_addr *in6)
   95 {
              u_int32_t scopeid;
      
              sin6->sin6_addr = *in6;
      
              /*
               * don't try to read *in6 beyond here, since the caller may
               * ask us to overwrite existing sockaddr_in6
               */
      
              sin6->sin6_scope_id = 0;
   95         if (IN6_IS_SCOPE_EMBED(in6)) {
                      /*
                       * KAME assumption: link id == interface id
                       */
   40                 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
   11                 if (scopeid) {
                              sin6->sin6_addr.s6_addr16[1] = 0;
                              sin6->sin6_scope_id = scopeid;
                      }
              }
      }
      
      /*
       * just clear the embedded scope identifer.
       */
      void
      in6_clearscope(struct in6_addr *addr)
      {
              if (IN6_IS_SCOPE_EMBED(addr))
                      addr->s6_addr16[1] = 0;
      }
      /*        $OpenBSD: lapic.c,v 1.55 2019/08/03 14:57:51 jcs Exp $        */
      /* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */
      
      /*-
       * Copyright (c) 2000 The NetBSD Foundation, Inc.
       * All rights reserved.
       *
       * This code is derived from software contributed to The NetBSD Foundation
       * by RedBack Networks Inc.
       *
       * Author: Bill Sommerfeld
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
       * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
       * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
       * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       * POSSIBILITY OF SUCH DAMAGE.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/device.h>
      
      #include <uvm/uvm_extern.h>
      
      #include <machine/codepatch.h>
      #include <machine/cpu.h>
      #include <machine/cpufunc.h>
      #include <machine/pmap.h>
      #include <machine/mpbiosvar.h>
      #include <machine/specialreg.h>
      #include <machine/segments.h>
      
      #include <machine/i82489reg.h>
      #include <machine/i82489var.h>
      
      #include <dev/ic/i8253reg.h>
      
      #include "ioapic.h"
      #include "xen.h"
      #include "hyperv.h"
      
      #if NIOAPIC > 0
      #include <machine/i82093var.h>
      #endif
      
      /* #define LAPIC_DEBUG */
      
      #ifdef LAPIC_DEBUG
      #define DPRINTF(x...)        do { printf(x); } while(0)
      #else
      #define DPRINTF(x...)
      #endif /* LAPIC_DEBUG */
      
      struct evcount clk_count;
      #ifdef MULTIPROCESSOR
      struct evcount ipi_count;
      #endif
      
      void        lapic_delay(int);
      static u_int32_t lapic_gettick(void);
      void        lapic_clockintr(void *, struct intrframe);
      void        lapic_initclocks(void);
      void        lapic_map(paddr_t);
      
      void lapic_hwmask(struct pic *, int);
      void lapic_hwunmask(struct pic *, int);
      void lapic_setup(struct pic *, struct cpu_info *, int, int, int);
      
      extern char idt_allocmap[];
      
      struct pic local_pic = {
              {0, {NULL}, NULL, 0, "lapic", NULL, 0, 0},
              PIC_LAPIC,
      #ifdef MULTIPROCESSOR
              {},
      #endif
              lapic_hwmask,
              lapic_hwunmask,
              lapic_setup,
              lapic_setup,
      };
      
      extern int x2apic_eoi;
      int x2apic_enabled = 0;
      
      u_int32_t x2apic_readreg(int reg);
      u_int32_t x2apic_cpu_number(void);
      void x2apic_writereg(int reg, u_int32_t val);
      void x2apic_ipi(int vec, int target, int dl);
      
      u_int32_t i82489_readreg(int reg);
      u_int32_t i82489_cpu_number(void);
      void i82489_writereg(int reg, u_int32_t val);
      void i82489_ipi(int vec, int target, int dl);
      
      u_int32_t (*lapic_readreg)(int)                        = i82489_readreg;
      void (*lapic_writereg)(int, u_int32_t)                = i82489_writereg;
      #ifdef MULTIPROCESSOR
      void (*x86_ipi)(int vec, int target, int dl)        = i82489_ipi;
      #endif
      
      u_int32_t
      i82489_readreg(int reg)
      {
              return *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic)
                  + reg));
      }
      
      u_int32_t
      i82489_cpu_number(void)
      {
              return i82489_readreg(LAPIC_ID) >> LAPIC_ID_SHIFT;
      }
      
      void
      i82489_writereg(int reg, u_int32_t val)
      {
              *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + reg)) =
                  val;
      }
      
      u_int32_t
      x2apic_readreg(int reg)
    5 {
              return rdmsr(MSR_X2APIC_BASE + (reg >> 4));
      }
      
      u_int32_t
      x2apic_cpu_number(void)
      {
              return x2apic_readreg(LAPIC_ID) & X2APIC_ID_MASK;
      }
      
      void
      x2apic_writereg(int reg, u_int32_t val)
      {
              wrmsr(MSR_X2APIC_BASE + (reg >> 4), val);
      }
      
      #ifdef MULTIPROCESSOR
      static inline void
      x2apic_writeicr(u_int32_t hi, u_int32_t lo)
      {
              u_int32_t msr = MSR_X2APIC_BASE + (LAPIC_ICRLO >> 4);
              __asm volatile("wrmsr" : : "a" (lo), "d" (hi), "c" (msr));
      }
      #endif
      
      u_int32_t
      lapic_cpu_number(void)
      {
              if (x2apic_enabled)
                      return x2apic_cpu_number();
              return i82489_cpu_number();
      }
      
      
      void
      lapic_map(paddr_t lapic_base)
      {
              pt_entry_t *pte;
              vaddr_t va;
              u_int64_t msr;
              u_long s;
              int tpr;
      
              s = intr_disable();
              tpr = lapic_tpr;
      
              msr = rdmsr(MSR_APICBASE);
      
              if (ISSET(msr, APICBASE_ENABLE_X2APIC) ||
                  (ISSET(cpu_ecxfeature, CPUIDECX_HV) &&
                  ISSET(cpu_ecxfeature, CPUIDECX_X2APIC))) {
                       /*
                        * On real hardware, x2apic must only be enabled if interrupt
                        * remapping is also enabled. See 10.12.7 of the SDM vol 3.
                        * On hypervisors, this is not necessary. Hypervisors can
                        * implement x2apic support even if the host CPU does not
                        * support it.  Until we support interrupt remapping, use
                        * x2apic only if the hypervisor flag is also set or it is
                        * enabled by BIOS.
                        */
                      if (!ISSET(msr, APICBASE_ENABLE_X2APIC)) {
                              msr |= APICBASE_ENABLE_X2APIC;
                              wrmsr(MSR_APICBASE, msr);
                      }
                      lapic_readreg = x2apic_readreg;
                      lapic_writereg = x2apic_writereg;
      #ifdef MULTIPROCESSOR
                      x86_ipi = x2apic_ipi;
      #endif
                      x2apic_enabled = 1;
                      codepatch_call(CPTAG_EOI, &x2apic_eoi);
      
                      lapic_writereg(LAPIC_TPRI, tpr);
                      va = (vaddr_t)&local_apic;
              } else {
                      /*
                       * Map local apic.  If we have a local apic, it's safe to
                       * assume we're on a 486 or better and can use invlpg and
                       * non-cacheable PTEs
                       *
                       * Whap the PTE "by hand" rather than calling pmap_kenter_pa
                       * because the latter will attempt to invoke TLB shootdown
                       * code just as we might have changed the value of
                       * cpu_number()..
                       */
                      va = (vaddr_t)&local_apic;
                      pte = kvtopte(va);
                      *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
                      invlpg(va);
      
                      lapic_tpr = tpr;
              }
      
              /*
               * Enter the LAPIC MMIO page in the U-K page table for handling
               * Meltdown (needed in the interrupt stub to acknowledge the
               * incoming interrupt). On CPUs unaffected by Meltdown,
               * pmap_enter_special is a no-op.
               */
              pmap_enter_special(va, lapic_base, PROT_READ | PROT_WRITE);
              DPRINTF("%s: entered lapic page va 0x%llx pa 0x%llx\n", __func__,
                  (uint64_t)va, (uint64_t)lapic_base);
      
              intr_restore(s);
      }
      
      /*
       * enable local apic
       */
      void
      lapic_enable(void)
      {
              lapic_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR);
      }
      
      void
      lapic_disable(void)
      {
              lapic_writereg(LAPIC_SVR, 0);
      }
      
      void
      lapic_set_lvt(void)
      {
              struct cpu_info *ci = curcpu();
              int i;
              struct mp_intr_map *mpi;
              uint32_t lint0;
      
      #ifdef MULTIPROCESSOR
              if (mp_verbose) {
                      apic_format_redir(ci->ci_dev->dv_xname, "prelint", 0, 0,
                          lapic_readreg(LAPIC_LVINT0));
                      apic_format_redir(ci->ci_dev->dv_xname, "prelint", 1, 0,
                          lapic_readreg(LAPIC_LVINT1));
              }
      #endif
      
      #if NIOAPIC > 0
              /*
               * Disable ExtINT by default when using I/O APICs.
               */
              if (nioapics > 0) {
                      lint0 = lapic_readreg(LAPIC_LVINT0);
                      lint0 |= LAPIC_LVT_MASKED;
                      lapic_writereg(LAPIC_LVINT0, lint0);
              }
      #endif
      
              if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
                      /*
                       * Detect the presence of C1E capability mostly on latest
                       * dual-cores (or future) k8 family. This mis-feature renders
                       * the local APIC timer dead, so we disable it by reading
                       * the Interrupt Pending Message register and clearing both
                       * C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
                       * 
                       * Reference:
                       *   "BIOS and Kernel Developer's Guide for AMD NPT
                       *    Family 0Fh Processors"
                       *   #32559 revision 3.00
                       */
                      if ((cpu_id & 0x00000f00) == 0x00000f00 &&
                          (cpu_id & 0x0fff0000) >= 0x00040000) {
                              uint64_t msr;
      
                              msr = rdmsr(MSR_INT_PEN_MSG);
                              if (msr & (IPM_C1E_CMP_HLT|IPM_SMI_CMP_HLT)) {
                                      msr &= ~(IPM_C1E_CMP_HLT|IPM_SMI_CMP_HLT);
                                      wrmsr(MSR_INT_PEN_MSG, msr);
                              }
                      }
              }
      
              for (i = 0; i < mp_nintrs; i++) {
                      mpi = &mp_intrs[i];
                      if (mpi->ioapic == NULL && (mpi->cpu_id == MPS_ALL_APICS
                                                  || mpi->cpu_id == ci->ci_apicid)) {
      #ifdef DIAGNOSTIC
                              if (mpi->ioapic_pin > 1)
                                      panic("lapic_set_lvt: bad pin value %d",
                                          mpi->ioapic_pin);
      #endif
                              if (mpi->ioapic_pin == 0)
                                      lapic_writereg(LAPIC_LVINT0, mpi->redir);
                              else
                                      lapic_writereg(LAPIC_LVINT1, mpi->redir);
                      }
              }
      
      #ifdef MULTIPROCESSOR
              if (mp_verbose) {
                      apic_format_redir(ci->ci_dev->dv_xname, "timer", 0, 0,
                          lapic_readreg(LAPIC_LVTT));
                      apic_format_redir(ci->ci_dev->dv_xname, "pcint", 0, 0,
                          lapic_readreg(LAPIC_PCINT));
                      apic_format_redir(ci->ci_dev->dv_xname, "lint", 0, 0,
                          lapic_readreg(LAPIC_LVINT0));
                      apic_format_redir(ci->ci_dev->dv_xname, "lint", 1, 0,
                          lapic_readreg(LAPIC_LVINT1));
                      apic_format_redir(ci->ci_dev->dv_xname, "err", 0, 0,
                          lapic_readreg(LAPIC_LVERR));
              }
      #endif
      }
      
      /*
       * Initialize fixed idt vectors for use by local apic.
       */
      void
      lapic_boot_init(paddr_t lapic_base)
      {
              static u_int64_t clk_irq = 0;
      #ifdef MULTIPROCESSOR
              static u_int64_t ipi_irq = 0;
      #endif
      
              lapic_map(lapic_base);
      
      #ifdef MULTIPROCESSOR
              idt_allocmap[LAPIC_IPI_VECTOR] = 1;
              idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
              idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
              idt_allocmap[LAPIC_IPI_INVLPG] = 1;
              idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
              if (!pmap_use_pcid) {
                      idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
                      idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
                      idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
              } else {
                      idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb_pcid);
                      idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_pcid);
                      idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_pcid);
              }
      #endif
              idt_allocmap[LAPIC_SPURIOUS_VECTOR] = 1;
              idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
      
              idt_allocmap[LAPIC_TIMER_VECTOR] = 1;
              idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
      
      #if NXEN > 0
              /* Xen HVM Event Channel Interrupt Vector */
              idt_allocmap[LAPIC_XEN_VECTOR] = 1;
              idt_vec_set(LAPIC_XEN_VECTOR, Xintr_xen_upcall);
      #endif
      #if NHYPERV > 0
              /* Hyper-V Interrupt Vector */
              idt_allocmap[LAPIC_HYPERV_VECTOR] = 1;
              idt_vec_set(LAPIC_HYPERV_VECTOR, Xintr_hyperv_upcall);
      #endif
      
              evcount_attach(&clk_count, "clock", &clk_irq);
      #ifdef MULTIPROCESSOR
              evcount_attach(&ipi_count, "ipi", &ipi_irq);
      #endif
      }
      
      static __inline u_int32_t
      lapic_gettick(void)
      {
              return lapic_readreg(LAPIC_CCR_TIMER);
      }
      
      #include <sys/kernel.h>                /* for hz */
      
      u_int32_t lapic_tval;
      
      /*
       * this gets us up to a 4GHz busclock....
       */
      u_int32_t lapic_per_second = 0;
      u_int32_t lapic_frac_usec_per_cycle;
      u_int64_t lapic_frac_cycle_per_usec;
      u_int32_t lapic_delaytab[26];
      
      void
      lapic_clockintr(void *arg, struct intrframe frame)
      {
              struct cpu_info *ci = curcpu();
              int floor;
      
              floor = ci->ci_handled_intr_level;
              ci->ci_handled_intr_level = ci->ci_ilevel;
              hardclock((struct clockframe *)&frame);
              ci->ci_handled_intr_level = floor;
      
              clk_count.ec_count++;
      }
      
      void
      lapic_startclock(void)
      {
              /*
               * Start local apic countdown timer running, in repeated mode.
               *
               * Mask the clock interrupt and set mode,
               * then set divisor,
               * then unmask and set the vector.
               */
              lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M);
              lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
              lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);
              lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR);
      }
      
      void
      lapic_initclocks(void)
      {
              lapic_startclock();
      
              i8254_inittimecounter_simple();
      }
      
      
      extern int gettick(void);        /* XXX put in header file */
      extern u_long rtclock_tval; /* XXX put in header file */
      
      static __inline void
      wait_next_cycle(void)
      {
              unsigned int tick, tlast;
      
              tlast = (1 << 16);        /* i8254 counter has 16 bits at most */
              for (;;) {
                      tick = gettick();
                      if (tick > tlast)
                              return;
                      tlast = tick;
              }
      }
      
      /*
       * Calibrate the local apic count-down timer (which is running at
       * bus-clock speed) vs. the i8254 counter/timer (which is running at
       * a fixed rate).
       *
       * The Intel MP spec says: "An MP operating system may use the IRQ8
       * real-time clock as a reference to determine the actual APIC timer clock
       * speed."
       *
       * We're actually using the IRQ0 timer.  Hmm.
       */
      void
      lapic_calibrate_timer(struct cpu_info *ci)
      {
              unsigned int startapic, endapic;
              u_int64_t dtick, dapic, tmp;
              u_long s;
              int i;
      
              if (lapic_per_second)
                      goto skip_calibration;
      
              if (mp_verbose)
                      printf("%s: calibrating local timer\n", ci->ci_dev->dv_xname);
      
              /*
               * Configure timer to one-shot, interrupt masked,
               * large positive number.
               */
              lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M);
              lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
              lapic_writereg(LAPIC_ICR_TIMER, 0x80000000);
      
              s = intr_disable();
      
              /* wait for current cycle to finish */
              wait_next_cycle();
      
              startapic = lapic_gettick();
      
              /* wait the next hz cycles */
              for (i = 0; i < hz; i++)
                      wait_next_cycle();
      
              endapic = lapic_gettick();
      
              intr_restore(s);
      
              dtick = hz * rtclock_tval;
              dapic = startapic-endapic;
      
              /*
               * there are TIMER_FREQ ticks per second.
               * in dtick ticks, there are dapic bus clocks.
               */
              tmp = (TIMER_FREQ * dapic) / dtick;
      
              lapic_per_second = tmp;
      
      skip_calibration:
              printf("%s: apic clock running at %dMHz\n",
                  ci->ci_dev->dv_xname, lapic_per_second / (1000 * 1000));
      
              if (lapic_per_second != 0) {
                      /*
                       * reprogram the apic timer to run in periodic mode.
                       * XXX need to program timer on other cpu's, too.
                       */
                      lapic_tval = (lapic_per_second * 2) / hz;
                      lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1);
      
                      lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M |
                          LAPIC_TIMER_VECTOR);
                      lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
                      lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);
      
                      /*
                       * Compute fixed-point ratios between cycles and
                       * microseconds to avoid having to do any division
                       * in lapic_delay.
                       */
      
                      tmp = (1000000 * (u_int64_t)1 << 32) / lapic_per_second;
                      lapic_frac_usec_per_cycle = tmp;
      
                      tmp = (lapic_per_second * (u_int64_t)1 << 32) / 1000000;
      
                      lapic_frac_cycle_per_usec = tmp;
      
                      /*
                       * Compute delay in cycles for likely short delays in usec.
                       */
                      for (i = 0; i < 26; i++)
                              lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >>
                                  32;
      
                      /*
                       * Now that the timer's calibrated, use the apic timer routines
                       * for all our timing needs..
                       */
                      delay_func = lapic_delay;
                      initclock_func = lapic_initclocks;
              }
      }
      
      /*
       * delay for N usec.
       */
      
      void
      lapic_delay(int usec)
    5 {
              int32_t tick, otick;
              int64_t deltat;                /* XXX may want to be 64bit */
      
              otick = lapic_gettick();
      
              if (usec <= 0)
                      return;
              if (usec <= 25)
    5                 deltat = lapic_delaytab[usec];
              else
                      deltat = (lapic_frac_cycle_per_usec * usec) >> 32;
      
    5         while (deltat > 0) {
                      tick = lapic_gettick();
                      if (tick > otick)
    3                         deltat -= lapic_tval - (tick - otick);
                      else
    5                         deltat -= otick - tick;
                      otick = tick;
      
                      CPU_BUSY_CYCLE();
              }
      }
      
      /*
       * XXX the following belong mostly or partly elsewhere..
       */
      
      #ifdef MULTIPROCESSOR
      static __inline void i82489_icr_wait(void);
      
      static __inline void
      i82489_icr_wait(void)
      {
      #ifdef DIAGNOSTIC
              unsigned j = 100000;
      #endif /* DIAGNOSTIC */
      
              while ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0) {
                      __asm volatile("pause": : :"memory");
      #ifdef DIAGNOSTIC
                      j--;
                      if (j == 0)
                              panic("i82489_icr_wait: busy");
      #endif /* DIAGNOSTIC */
              }
      }
      
      void
      i82489_ipi_init(int target)
      {
      
              if ((target & LAPIC_DEST_MASK) == 0)
                      i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
      
              i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) |
                  LAPIC_DLMODE_INIT | LAPIC_LVL_ASSERT );
      
              i82489_icr_wait();
      
              i8254_delay(10000);
      
              i82489_writereg(LAPIC_ICRLO, (target & LAPIC_DEST_MASK) |
                   LAPIC_DLMODE_INIT | LAPIC_LVL_TRIG | LAPIC_LVL_DEASSERT);
      
              i82489_icr_wait();
      }
      
      void
      i82489_ipi(int vec, int target, int dl)
      {
              int s;
      
              s = splhigh();
      
              i82489_icr_wait();
      
              if ((target & LAPIC_DEST_MASK) == 0)
                      i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
      
              i82489_writereg(LAPIC_ICRLO,
                  (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT);
      
              i82489_icr_wait();
      
              splx(s);
      }
      
      void
      x2apic_ipi_init(int target)
      {
              u_int64_t hi = 0;
      
              if ((target & LAPIC_DEST_MASK) == 0)
                      hi = target & 0xff;
      
              x2apic_writeicr(hi, (target & LAPIC_DEST_MASK) | LAPIC_DLMODE_INIT |
                  LAPIC_LVL_ASSERT );
      
              i8254_delay(10000);
      
              x2apic_writeicr(0, (target & LAPIC_DEST_MASK) | LAPIC_DLMODE_INIT |
                  LAPIC_LVL_TRIG | LAPIC_LVL_DEASSERT);
      }
      
      void
      x2apic_ipi(int vec, int target, int dl)
  582 {
              u_int64_t hi = 0, lo;
      
              if ((target & LAPIC_DEST_MASK) == 0)
                      hi = target & 0xff;
      
              lo = (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT;
      
              x2apic_writeicr(hi, lo);
      }
      
      void
      x86_ipi_init(int target)
      {
              if (x2apic_enabled)
                      x2apic_ipi_init(target);
              else
                      i82489_ipi_init(target);
      }
      #endif /* MULTIPROCESSOR */
      
      
      /*
       * Using 'pin numbers' as:
       * 0 - timer
       * 1 - unused
       * 2 - PCINT
       * 3 - LVINT0
       * 4 - LVINT1
       * 5 - LVERR
       */
      
      void
      lapic_hwmask(struct pic *pic, int pin)
      {
              int reg;
              u_int32_t val;
      
              reg = LAPIC_LVTT + (pin << 4);
              val = lapic_readreg(reg);
              val |= LAPIC_LVT_MASKED;
              lapic_writereg(reg, val);
      }
      
      void
      lapic_hwunmask(struct pic *pic, int pin)
      {
              int reg;
              u_int32_t val;
      
              reg = LAPIC_LVTT + (pin << 4);
              val = lapic_readreg(reg);
              val &= ~LAPIC_LVT_MASKED;
              lapic_writereg(reg, val);
      }
      
      void
      lapic_setup(struct pic *pic, struct cpu_info *ci, int pin, int idtvec, int type)
      {
      }
      /*        $OpenBSD: uvm_pmemrange.h,v 1.14 2016/09/16 02:47:09 dlg Exp $        */
      
      /*
       * Copyright (c) 2009 Ariane van der Steldt <ariane@stack.nl>
       *
       * Permission to use, copy, modify, and distribute this software for any
       * purpose with or without fee is hereby granted, provided that the above
       * copyright notice and this permission notice appear in all copies.
       *
       * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       */
      
      /*
       * uvm_pmemrange.h: describe and manage free physical memory.
       */
      
      #ifndef _UVM_UVM_PMEMRANGE_H_
      #define _UVM_UVM_PMEMRANGE_H_
      
      RBT_HEAD(uvm_pmr_addr, vm_page);
      RBT_HEAD(uvm_pmr_size, vm_page);
      
      /*
       * Page types available:
       * - DIRTY: this page may contain random data.
       * - ZERO: this page has been zeroed.
       */
      #define UVM_PMR_MEMTYPE_DIRTY        0
      #define UVM_PMR_MEMTYPE_ZERO        1
      #define UVM_PMR_MEMTYPE_MAX        2
      
      /*
       * An address range of memory.
       */
      struct uvm_pmemrange {
              struct        uvm_pmr_addr addr;        /* Free page chunks, sorted by addr. */
              struct        uvm_pmr_size size[UVM_PMR_MEMTYPE_MAX];
                                              /* Free page chunks, sorted by size. */
              TAILQ_HEAD(, vm_page) single[UVM_PMR_MEMTYPE_MAX];
                                              /* single page regions (uses pageq) */
      
              paddr_t        low;                        /* Start of address range (pgno). */
              paddr_t        high;                        /* End +1 (pgno). */
              int        use;                        /* Use counter. */
              psize_t        nsegs;                        /* Current range count. */
      
              TAILQ_ENTRY(uvm_pmemrange) pmr_use;
                                              /* pmr, sorted by use */
              RBT_ENTRY(uvm_pmemrange) pmr_addr;
                                              /* pmr, sorted by address */
      };
      
      /*
       * Description of failing memory allocation.
       *
       * Two ways new pages can become available:
       * [1] page daemon drops them (we notice because they are freed)
       * [2] a process calls free
       *
       * The buffer cache and page daemon can decide that they don't have the
       * ability to make pages available in the requested range. In that case,
       * the FAIL bit will be set.
       * XXX There's a possibility that a page is no longer on the queues but
       * XXX has not yet been freed, or that a page was busy.
       * XXX Also, wired pages are not considered for paging, so they could
       * XXX cause a failure that may be recoverable.
       */
      struct uvm_pmalloc {
              TAILQ_ENTRY(uvm_pmalloc) pmq;
      
              /*
               * Allocation request parameters.
               */
              struct uvm_constraint_range pm_constraint;
              psize_t        pm_size;
      
              /*
               * State flags.
               */
              int        pm_flags;
      };
      
      /*
       * uvm_pmalloc flags.
       */
      #define UVM_PMA_LINKED        0x01        /* uvm_pmalloc is on list */
      #define UVM_PMA_BUSY        0x02        /* entry is busy with fpageq unlocked */
      #define UVM_PMA_FAIL        0x10        /* page daemon cannot free pages */
      #define UVM_PMA_FREED        0x20        /* at least one page in the range was freed */
      
      RBT_HEAD(uvm_pmemrange_addr, uvm_pmemrange);
      TAILQ_HEAD(uvm_pmemrange_use, uvm_pmemrange);
      
      /*
       * pmr control structure. Contained in uvm.pmr_control.
       */
      struct uvm_pmr_control {
              struct        uvm_pmemrange_addr addr;
              struct        uvm_pmemrange_use use;
      
              /* Only changed while fpageq is locked. */
              TAILQ_HEAD(, uvm_pmalloc) allocs;
      };
      
      void        uvm_pmr_freepages(struct vm_page *, psize_t);
      void        uvm_pmr_freepageq(struct pglist *);
      int        uvm_pmr_getpages(psize_t, paddr_t, paddr_t, paddr_t, paddr_t,
                  int, int, struct pglist *);
      void        uvm_pmr_init(void);
      int        uvm_wait_pla(paddr_t, paddr_t, paddr_t, int);
      void        uvm_wakeup_pla(paddr_t, psize_t);
      
      #if defined(DDB) || defined(DEBUG)
      int        uvm_pmr_isfree(struct vm_page *pg);
      #endif
      
      /*
       * Internal tree logic.
       */
      
      int        uvm_pmr_addr_cmp(const struct vm_page *, const struct vm_page *);
      int        uvm_pmr_size_cmp(const struct vm_page *, const struct vm_page *);
      
  143 RBT_PROTOTYPE(uvm_pmr_addr, vm_page, objt, uvm_pmr_addr_cmp);
      RBT_PROTOTYPE(uvm_pmr_size, vm_page, objt, uvm_pmr_size_cmp);
      RBT_PROTOTYPE(uvm_pmemrange_addr, uvm_pmemrange, pmr_addr,
          uvm_pmemrange_addr_cmp);
      
      struct vm_page                *uvm_pmr_insert_addr(struct uvm_pmemrange *,
                                  struct vm_page *, int);
      void                         uvm_pmr_insert_size(struct uvm_pmemrange *,
                                  struct vm_page *);
      struct vm_page                *uvm_pmr_insert(struct uvm_pmemrange *,
                                  struct vm_page *, int);
      void                         uvm_pmr_remove_addr(struct uvm_pmemrange *,
                                  struct vm_page *);
      void                         uvm_pmr_remove_size(struct uvm_pmemrange *,
                                  struct vm_page *);
      void                         uvm_pmr_remove(struct uvm_pmemrange *,
                                  struct vm_page *);
      struct vm_page                *uvm_pmr_extract_range(struct uvm_pmemrange *,
                                  struct vm_page *, paddr_t, paddr_t,
                                  struct pglist *);
      
      #endif /* _UVM_UVM_PMEMRANGE_H_ */
      /*        $OpenBSD: tty_subr.c,v 1.33 2016/03/14 23:08:06 krw Exp $        */
      /*        $NetBSD: tty_subr.c,v 1.13 1996/02/09 19:00:43 christos Exp $        */
      
      /*
       * Copyright (c) 1993, 1994 Theo de Raadt
       * All rights reserved.
       *
       * Per Lindqvist <pgd@compuram.bbt.se> supplied an almost fully working
       * set of true clist functions that this is very loosely based on.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/ioctl.h>
      #include <sys/tty.h>
      #include <sys/malloc.h>
      
      /*
       * If TTY_QUOTE functionality isn't required by a line discipline,
       * it can free c_cq and set it to NULL. This speeds things up,
       * and also does not use any extra memory. This is useful for (say)
       * a SLIP line discipline that wants a 32K ring buffer for data
       * but doesn't need quoting.
       */
      #define QMEM(n)                ((((n)-1)/NBBY)+1)
      
      void        clrbits(u_char *, int, int);
      
      /*
       * Initialize a particular clist. Ok, they are really ring buffers,
       * of the specified length, with/without quoting support.
       */
      void
      clalloc(struct clist *clp, int size, int quot)
      {
      
              clp->c_cs = malloc(size, M_TTYS, M_WAITOK|M_ZERO);
      
              if (quot)
                      clp->c_cq = malloc(QMEM(size), M_TTYS, M_WAITOK|M_ZERO);
              else
                      clp->c_cq = NULL;
      
              clp->c_cf = clp->c_cl = NULL;
              clp->c_ce = clp->c_cs + size;
              clp->c_cn = size;
              clp->c_cc = 0;
      }
      
      void
      clfree(struct clist *clp)
      {
              if (clp->c_cs) {
                      explicit_bzero(clp->c_cs, clp->c_cn);
                      free(clp->c_cs, M_TTYS, clp->c_cn);
              }
              if (clp->c_cq) {
                      explicit_bzero(clp->c_cq, QMEM(clp->c_cn));
                      free(clp->c_cq, M_TTYS, QMEM(clp->c_cn));
              }
              clp->c_cs = clp->c_cq = NULL;
      }
      
      
      /*
       * Get a character from a clist.
       */
      int
      getc(struct clist *clp)
  198 {
              int c = -1;
              int s;
      
              s = spltty();
  196         if (clp->c_cc == 0)
                      goto out;
      
              c = *clp->c_cf & 0xff;
              *clp->c_cf = 0;
              if (clp->c_cq) {
  170                 if (isset(clp->c_cq, clp->c_cf - clp->c_cs))
                              c |= TTY_QUOTE;
                      clrbit(clp->c_cq, clp->c_cf - clp->c_cs);
              }
  170         if (++clp->c_cf == clp->c_ce)
    2                 clp->c_cf = clp->c_cs;
  155         if (--clp->c_cc == 0)
  151                 clp->c_cf = clp->c_cl = NULL;
      out:
              splx(s);
              return c;
      }
      
      /*
       * Copy clist to buffer.
       * Return number of bytes moved.
       */
      int
      q_to_b(struct clist *clp, u_char *cp, int count)
    7 {
              int cc;
              u_char *p = cp;
              int s;
      
              s = spltty();
              /* optimize this while loop */
    7         while (count > 0 && clp->c_cc > 0) {
                      cc = clp->c_cl - clp->c_cf;
    6                 if (clp->c_cf >= clp->c_cl)
    2                         cc = clp->c_ce - clp->c_cf;
                      if (cc > count)
                              cc = count;
                      memcpy(p, clp->c_cf, cc);
                      memset(clp->c_cf, 0, cc);
    7                 if (clp->c_cq)
                              clrbits(clp->c_cq, clp->c_cf - clp->c_cs, cc);
                      count -= cc;
                      p += cc;
                      clp->c_cc -= cc;
                      clp->c_cf += cc;
    7                 if (clp->c_cf == clp->c_ce)
    2                         clp->c_cf = clp->c_cs;
              }
    3         if (clp->c_cc == 0)
    6                 clp->c_cf = clp->c_cl = NULL;
              splx(s);
              return p - cp;
      }
      
      /*
       * Return count of contiguous characters in clist.
       * Stop counting if flag&character is non-null.
       */
      int
      ndqb(struct clist *clp, int flag)
      {
              int count = 0;
              int i;
              int cc;
              int s;
      
              s = spltty();
              if ((cc = clp->c_cc) == 0)
                      goto out;
      
              if (flag == 0) {
                      count = clp->c_cl - clp->c_cf;
                      if (count <= 0)
                              count = clp->c_ce - clp->c_cf;
                      goto out;
              }
      
              i = clp->c_cf - clp->c_cs;
              if (flag & TTY_QUOTE) {
                      while (cc-- > 0 && !(clp->c_cs[i++] & (flag & ~TTY_QUOTE) ||
                          isset(clp->c_cq, i))) {
                              count++;
                              if (i == clp->c_cn)
                                      break;
                      }
              } else {
                      while (cc-- > 0 && !(clp->c_cs[i++] & flag)) {
                              count++;
                              if (i == clp->c_cn)
                                      break;
                      }
              }
      out:
              splx(s);
              return count;
      }
      
      /*
       * Flush count bytes from clist.
       */
      void
      ndflush(struct clist *clp, int count)
  172 {
              int cc;
              int s;
      
              s = spltty();
              if (count == clp->c_cc) {
  172                 clp->c_cc = 0;
                      clp->c_cf = clp->c_cl = NULL;
                      goto out;
              }
              /* optimize this while loop */
              while (count > 0 && clp->c_cc > 0) {
                      cc = clp->c_cl - clp->c_cf;
                      if (clp->c_cf >= clp->c_cl)
                              cc = clp->c_ce - clp->c_cf;
                      if (cc > count)
                              cc = count;
                      count -= cc;
                      clp->c_cc -= cc;
                      clp->c_cf += cc;
                      if (clp->c_cf == clp->c_ce)
                              clp->c_cf = clp->c_cs;
              }
              if (clp->c_cc == 0)
                      clp->c_cf = clp->c_cl = NULL;
      out:
              splx(s);
      }
      
      /*
       * Put a character into the output queue.
       */
      int
      putc(int c, struct clist *clp)
  306 {
              int i;
              int s;
      
              s = spltty();
              if (clp->c_cc == clp->c_cn) {
   56                 splx(s);
                      return -1;
              }
      
              if (clp->c_cc == 0) {
                      if (!clp->c_cs) {
      #if defined(DIAGNOSTIC)
                              printf("putc: required clalloc\n");
      #endif
                              clalloc(clp, 1024, 1);
                      }
  260                 clp->c_cf = clp->c_cl = clp->c_cs;
              }
      
              *clp->c_cl = c & 0xff;
              i = clp->c_cl - clp->c_cs;
  239         if (clp->c_cq) {
  269                 if (c & TTY_QUOTE)
                              setbit(clp->c_cq, i);
                      else
                              clrbit(clp->c_cq, i);
              }
              clp->c_cc++;
              clp->c_cl++;
  299         if (clp->c_cl == clp->c_ce)
   26                 clp->c_cl = clp->c_cs;
              splx(s);
              return 0;
      }
      
      /*
       * optimized version of
       *
       * for (i = 0; i < len; i++)
       *        clrbit(cp, off + i);
       */
      void
      clrbits(u_char *cp, int off, int len)
      {
              int sby, sbi, eby, ebi;
              int i;
    8         u_char mask;
      
              if (len==1) {
    3                 clrbit(cp, off);
                      return;
              }
      
              sby = off / NBBY;
              sbi = off % NBBY;
              eby = (off+len) / NBBY;
              ebi = (off+len) % NBBY;
              if (sby == eby) {
    2                 mask = ((1 << (ebi - sbi)) - 1) << sbi;
                      cp[sby] &= ~mask;
              } else {
                      mask = (1<<sbi) - 1;
                      cp[sby++] &= mask;
      
   10                 for (i = sby; i < eby; i++)
                              cp[i] = 0x00;
      
                      mask = (1<<ebi) - 1;
    6                 if (mask)        /* if no mask, eby may be 1 too far */
                              cp[eby] &= ~mask;
      
              }
      }
      
      /*
       * Copy buffer to clist.
       * Return number of bytes not transferred.
       */
      int
      b_to_q(u_char *cp, int count, struct clist *clp)
   56 {
              int cc;
              u_char *p = cp;
              int s;
      
              if (count <= 0)
                      return 0;
      
              s = spltty();
    3         if (clp->c_cc == clp->c_cn)
                      goto out;
      
   39         if (clp->c_cc == 0) {
                      if (!clp->c_cs) {
      #if defined(DIAGNOSTIC)
                              printf("b_to_q: required clalloc\n");
      #endif
                              clalloc(clp, 1024, 1);
                      }
   35                 clp->c_cf = clp->c_cl = clp->c_cs;
              }
      
              /* optimize this while loop */
   55         while (count > 0 && clp->c_cc < clp->c_cn) {
                      cc = clp->c_ce - clp->c_cl;
                      if (clp->c_cf > clp->c_cl)
                              cc = clp->c_cf - clp->c_cl;
                      if (cc > count)
                              cc = count;
                      memcpy(clp->c_cl, p, cc);
   43                 if (clp->c_cq)
   13                         clrbits(clp->c_cq, clp->c_cl - clp->c_cs, cc);
                      p += cc;
                      count -= cc;
                      clp->c_cc += cc;
                      clp->c_cl += cc;
   55                 if (clp->c_cl == clp->c_ce)
    5                         clp->c_cl = clp->c_cs;
              }
      out:
              splx(s);
              return count;
      }
      
      static int cc;
      
      /*
       * Given a non-NULL pointer into the clist return the pointer
       * to the next character in the list or return NULL if no more chars.
       *
       * Callers must not allow getc's to happen between firstc's and nextc's
       * so that the pointer becomes invalid.  Note that interrupts are NOT
       * masked.
       */
      u_char *
      nextc(struct clist *clp, u_char *cp, int *c)
  101 {
      
              if (clp->c_cf == cp) {
                      /*
                       * First time initialization.
                       */
  101                 cc = clp->c_cc;
              }
   99         if (cc == 0 || cp == NULL)
                      return NULL;
  101         if (--cc == 0)
                      return NULL;
   99         if (++cp == clp->c_ce)
    2                 cp = clp->c_cs;
              *c = *cp & 0xff;
              if (clp->c_cq) {
   99                 if (isset(clp->c_cq, cp - clp->c_cs))
   40                         *c |= TTY_QUOTE;
              }
              return cp;
      }
      
      /*
       * Given a non-NULL pointer into the clist return the pointer
       * to the first character in the list or return NULL if no more chars.
       *
       * Callers must not allow getc's to happen between firstc's and nextc's
       * so that the pointer becomes invalid.  Note that interrupts are NOT
       * masked.
       *
       * *c is set to the NEXT character
       */
      u_char *
      firstc(struct clist *clp, int *c)
  106 {
              u_char *cp;
      
              cc = clp->c_cc;
   73         if (cc == 0)
                      return NULL;
              cp = clp->c_cf;
              *c = *cp & 0xff;
              if (clp->c_cq) {
   99                 if (isset(clp->c_cq, cp - clp->c_cs))
    3                         *c |= TTY_QUOTE;
              }
              return clp->c_cf;
      }
      
      /*
       * Remove the last character in the clist and return it.
       */
      int
      unputc(struct clist *clp)
  134 {
              unsigned int c = -1;
              int s;
      
              s = spltty();
   54         if (clp->c_cc == 0)
                      goto out;
      
  129         if (clp->c_cl == clp->c_cs)
    2                 clp->c_cl = clp->c_ce - 1;
              else
                      --clp->c_cl;
              clp->c_cc--;
      
              c = *clp->c_cl & 0xff;
              *clp->c_cl = 0;
              if (clp->c_cq) {
  131                 if (isset(clp->c_cq, clp->c_cl - clp->c_cs))
                              c |= TTY_QUOTE;
                      clrbit(clp->c_cq, clp->c_cl - clp->c_cs);
              }
  128         if (clp->c_cc == 0)
   74                 clp->c_cf = clp->c_cl = NULL;
      out:
              splx(s);
              return c;
      }
      
      /*
       * Put the chars in the from queue on the end of the to queue.
       */
      void
      catq(struct clist *from, struct clist *to)
  149 {
              int c;
              int s;
      
              s = spltty();
              if (from->c_cc == 0) {        /* nothing to move */
   10                 splx(s);
                      return;
              }
      
              /*
               * if `to' queue is empty and the queues are the same max size,
               * it is more efficient to just swap the clist structures.
               */
   92         if (to->c_cc == 0 && from->c_cn == to->c_cn) {
  124                 struct clist tmp;
      
                      tmp = *from;
                      *from = *to;
                      *to = tmp;
                      splx(s);
                      return;
              }
              splx(s);
      
   92         while ((c = getc(from)) != -1)
                      putc(c, to);
      }
      /*        $OpenBSD: ffs_balloc.c,v 1.45 2019/07/19 00:24:31 cheloha Exp $        */
      /*        $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $        */
      
      /*
       * Copyright (c) 2002 Networks Associates Technology, Inc.
       * All rights reserved.
       *
       * This software was developed for the FreeBSD Project by Marshall
       * Kirk McKusick and Network Associates Laboratories, the Security
       * Research Division of Network Associates, Inc. under DARPA/SPAWAR
       * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
       * research program.
       *
       * Copyright (c) 1982, 1986, 1989, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)ffs_balloc.c        8.4 (Berkeley) 9/23/93
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/buf.h>
      #include <sys/proc.h>
      #include <sys/mount.h>
      #include <sys/vnode.h>
      
      #include <ufs/ufs/quota.h>
      #include <ufs/ufs/inode.h>
      #include <ufs/ufs/ufsmount.h>
      #include <ufs/ufs/ufs_extern.h>
      
      #include <ufs/ffs/fs.h>
      #include <ufs/ffs/ffs_extern.h>
      
      int ffs1_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
      #ifdef FFS2
      int ffs2_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
      #endif
      
      /*
       * Balloc defines the structure of file system storage
       * by allocating the physical blocks on a device given
       * the inode and the logical block number in a file.
       */
      int
      ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred,
          int flags, struct buf **bpp)
  114 {
              daddr_t lbn, nb, newb, pref;
              struct fs *fs;
   18         struct buf *bp, *nbp;
              struct vnode *vp;
              struct proc *p;
              struct indir indirs[NIADDR + 2];
              int32_t *bap;
              int deallocated, osize, nsize, num, i, error;
              int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1];
              int unwindidx = -1;
      
              vp = ITOV(ip);
              fs = ip->i_fs;
              p = curproc;
              lbn = lblkno(fs, startoffset);
              size = blkoff(fs, startoffset) + size;
              if (size > fs->fs_bsize)
                      panic("ffs1_balloc: blk too big");
              if (bpp != NULL)
  114                 *bpp = NULL;
              if (lbn < 0)
                      return (EFBIG);
      
              /*
               * If the next write will extend the file into a new block,
               * and the file is currently composed of a fragment
               * this fragment has to be extended to be a full block.
               */
              nb = lblkno(fs, ip->i_ffs1_size);
  103         if (nb < NDADDR && nb < lbn) {
   15                 osize = blksize(fs, ip, nb);
    9                 if (osize < fs->fs_bsize && osize > 0) {
                              error = ffs_realloccg(ip, nb,
                                  ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]),
                                  osize, (int)fs->fs_bsize, cred, bpp, &newb);
                              if (error)
                                      return (error);
    7                         if (DOINGSOFTDEP(vp))
                                      softdep_setup_allocdirect(ip, nb, newb,
                                          ip->i_ffs1_db[nb], fs->fs_bsize, osize,
                                          bpp ? *bpp : NULL);
      
                              ip->i_ffs1_size = lblktosize(fs, nb + 1);
                              uvm_vnp_setsize(vp, ip->i_ffs1_size);
                              ip->i_ffs1_db[nb] = newb;
                              ip->i_flag |= IN_CHANGE | IN_UPDATE;
                              if (bpp != NULL) {
                                      if (flags & B_SYNC)
    3                                         bwrite(*bpp);
                                      else
    4                                         bawrite(*bpp);
                              }
                      }
              }
              /*
               * The first NDADDR blocks are direct blocks
               */
              if (lbn < NDADDR) {
                      nb = ip->i_ffs1_db[lbn];
                      if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) {
                              /*
                               * The block is an already-allocated direct block
                               * and the file already extends past this block,
                               * thus this must be a whole block.
                               * Just read the block (if requested).
                               */
      
                              if (bpp != NULL) {
                                      error = bread(vp, lbn, fs->fs_bsize, bpp);
   16                                 if (error) {
                                              brelse(*bpp);
                                              return (error);
                                      }
                              }
                              return (0);
                      }
                      if (nb != 0) {
                              /*
                               * Consider need to reallocate a fragment.
                               */
                              osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size));
                              nsize = fragroundup(fs, size);
                              if (nsize <= osize) {
                                      /*
                                       * The existing block is already
                                       * at least as big as we want.
                                       * Just read the block (if requested).
                                       */
                                      if (bpp != NULL) {
                                              error = bread(vp, lbn, fs->fs_bsize,
                                                  bpp);
                                              if (error) {
                                                      brelse(*bpp);
                                                      return (error);
                                              }
   22                                         buf_adjcnt((*bpp), osize);
                                      }
                                      return (0);
                              } else {
                                      /*
                                       * The existing block is smaller than we
                                       * want, grow it.
                                       */
                                      error = ffs_realloccg(ip, lbn,
                                          ffs1_blkpref(ip, lbn, (int)lbn,
                                              &ip->i_ffs1_db[0]),
                                          osize, nsize, cred, bpp, &newb);
                                      if (error)
                                              return (error);
    6                                 if (DOINGSOFTDEP(vp))
                                              softdep_setup_allocdirect(ip, lbn,
                                                  newb, nb, nsize, osize,
                                                  bpp ? *bpp : NULL);
                              }
                      } else {
                              /*
                               * The block was not previously allocated,
                               * allocate a new block or fragment.
                               */
      
                              if (ip->i_ffs1_size < lblktosize(fs, lbn + 1))
   66                                 nsize = fragroundup(fs, size);
                              else
    5                                 nsize = fs->fs_bsize;
                              error = ffs_alloc(ip, lbn,
                                  ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]),
                                  nsize, cred, &newb);
                              if (error)
                                      return (error);
                              if (bpp != NULL) {
                                      *bpp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
   23                                 if (nsize < fs->fs_bsize)
   51                                         (*bpp)->b_bcount = nsize;
                                      (*bpp)->b_blkno = fsbtodb(fs, newb);
   19                                 if (flags & B_CLRBUF)
   55                                         clrbuf(*bpp);
                              }
   70                         if (DOINGSOFTDEP(vp))
                                      softdep_setup_allocdirect(ip, lbn, newb, 0,
                                          nsize, 0, bpp ? *bpp : NULL);
                      }
                      ip->i_ffs1_db[lbn] = newb;
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
                      return (0);
              }
      
              /*
               * Determine the number of levels of indirection.
               */
              pref = 0;
              if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
                      return(error);
      #ifdef DIAGNOSTIC
              if (num < 1)
                      panic ("ffs1_balloc: ufs_bmaparray returned indirect block");
      #endif
              /*
               * Fetch the first indirect block allocating if necessary.
               */
              --num;
              nb = ip->i_ffs1_ib[indirs[0].in_off];
      
              allocib = NULL;
              allocblk = allociblk;
              if (nb == 0) {
                      pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
                      error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
                                        cred, &newb);
                      if (error)
                              goto fail;
                      nb = newb;
      
                      *allocblk++ = nb;
                      bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, INFSLP);
                      bp->b_blkno = fsbtodb(fs, nb);
                      clrbuf(bp);
      
                      if (DOINGSOFTDEP(vp)) {
                              softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
                                  newb, 0, fs->fs_bsize, 0, bp);
                              bdwrite(bp);
                      } else {
                              /*
                               * Write synchronously so that indirect blocks
                               * never point at garbage.
                               */
   29                         if ((error = bwrite(bp)) != 0)
                                      goto fail;
                      }
                      allocib = &ip->i_ffs1_ib[indirs[0].in_off];
                      *allocib = nb;
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
              }
      
              /*
               * Fetch through the indirect blocks, allocating as necessary.
               */
              for (i = 1;;) {
   23                 error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp);
   11                 if (error) {
                              brelse(bp);
                              goto fail;
                      }
                      bap = (int32_t *)bp->b_data;
                      nb = bap[indirs[i].in_off];
                      if (i == num)
                              break;
                      i++;
                      if (nb != 0) {
                              brelse(bp);
                              continue;
                      }
    9                 if (pref == 0)
    2                         pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL);
                      error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
                                        &newb);
                      if (error) {
                              brelse(bp);
                              goto fail;
                      }
                      nb = newb;
                      *allocblk++ = nb;
                      nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, INFSLP);
                      nbp->b_blkno = fsbtodb(fs, nb);
                      clrbuf(nbp);
      
                      if (DOINGSOFTDEP(vp)) {
                              softdep_setup_allocindir_meta(nbp, ip, bp,
                                  indirs[i - 1].in_off, nb);
                              bdwrite(nbp);
                      } else {
                              /*
                               * Write synchronously so that indirect blocks
                               * never point at garbage.
                               */
   10                         if ((error = bwrite(nbp)) != 0) {
                                      brelse(bp);
                                      goto fail;
                              }
                      }
                      bap[indirs[i - 1].in_off] = nb;
                      if (allocib == NULL && unwindidx < 0)
                              unwindidx = i - 1;
                      /*
                       * If required, write synchronously, otherwise use
                       * delayed write.
                       */
                      if (flags & B_SYNC) {
                              bwrite(bp);
                      } else {
   10                         bdwrite(bp);
                      }
              }
              /*
               * Get the data block, allocating if necessary.
               */
              if (nb == 0) {
                      pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
                      error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
                                        &newb);
                      if (error) {
                              brelse(bp);
                              goto fail;
                      }
                      nb = newb;
                      *allocblk++ = nb;
                      if (bpp != NULL) {
                              nbp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
                              nbp->b_blkno = fsbtodb(fs, nb);
   16                         if (flags & B_CLRBUF)
                                      clrbuf(nbp);
                              *bpp = nbp;
                      }
   33                 if (DOINGSOFTDEP(vp))
                              softdep_setup_allocindir_page(ip, lbn, bp,
                                  indirs[i].in_off, nb, 0, bpp ? *bpp : NULL);
                      bap[indirs[i].in_off] = nb;
                      /*
                       * If required, write synchronously, otherwise use
                       * delayed write.
                       */
                      if (flags & B_SYNC) {
   12                         bwrite(bp);
                      } else {
   23                         bdwrite(bp);
                      }
                      return (0);
              }
              brelse(bp);
              if (bpp != NULL) {
                      if (flags & B_CLRBUF) {
                              error = bread(vp, lbn, (int)fs->fs_bsize, &nbp);
                              if (error) {
                                      brelse(nbp);
                                      goto fail;
                              }
                      } else {
    2                         nbp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
                              nbp->b_blkno = fsbtodb(fs, nb);
                      }
                      *bpp = nbp;
              }
              return (0);
      
      fail:
              /*
               * If we have failed to allocate any blocks, simply return the error.
               * This is the usual case and avoids the need to fsync the file.
               */
              if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
                      return (error);
              /*
               * If we have failed part way through block allocation, we have to
               * deallocate any indirect blocks that we have allocated. We have to
               * fsync the file before we start to get rid of all of its
               * dependencies so that we do not leave them dangling. We have to sync
               * it at the end so that the softdep code does not find any untracked
               * changes. Although this is really slow, running out of disk space is
               * not expected to be a common occurrence. The error return from fsync
               * is ignored as we already have an error to return to the user.
               */
              VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
              for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
                      ffs_blkfree(ip, *blkp, fs->fs_bsize);
                      deallocated += fs->fs_bsize;
              }
              if (allocib != NULL) {
                      *allocib = 0;
              } else if (unwindidx >= 0) {
                      int r;
      
                      r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp);
                      if (r)
                              panic("Could not unwind indirect block, error %d", r);
                      bap = (int32_t *)bp->b_data;
                      bap[indirs[unwindidx].in_off] = 0;
                      if (flags & B_SYNC) {
                              bwrite(bp);
                      } else {
                              bdwrite(bp);
                      }
              }
              if (deallocated) {
                      /*
                       * Restore user's disk quota because allocation failed.
                       */
                      (void)ufs_quota_free_blocks(ip, btodb(deallocated), cred);
      
                      ip->i_ffs1_blocks -= btodb(deallocated);
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
              }
              VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
              return (error);
      }
      
      #ifdef FFS2
      int
      ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
          int flags, struct buf **bpp)
      {
              daddr_t lbn, lastlbn, nb, newb, *blkp;
              daddr_t pref, *allocblk, allociblk[NIADDR + 1];
              daddr_t *bap, *allocib;
              int deallocated, osize, nsize, num, i, error, unwindidx, r;
              struct buf *bp, *nbp;
              struct indir indirs[NIADDR + 2];
              struct fs *fs;
              struct vnode *vp;
              struct proc *p;
              
              vp = ITOV(ip);
              fs = ip->i_fs;
              p = curproc;
              unwindidx = -1;
      
              lbn = lblkno(fs, off);
              size = blkoff(fs, off) + size;
      
              if (size > fs->fs_bsize)
                      panic("ffs2_balloc: block too big");
      
              if (bpp != NULL)
                      *bpp = NULL;
      
              if (lbn < 0)
                      return (EFBIG);
      
              /*
               * If the next write will extend the file into a new block, and the
               * file is currently composed of a fragment, this fragment has to be
               * extended to be a full block.
               */
              lastlbn = lblkno(fs, ip->i_ffs2_size);
              if (lastlbn < NDADDR && lastlbn < lbn) {
                      nb = lastlbn;
                      osize = blksize(fs, ip, nb);
                      if (osize < fs->fs_bsize && osize > 0) {
                              error = ffs_realloccg(ip, nb, ffs2_blkpref(ip,
                                  lastlbn, nb, &ip->i_ffs2_db[0]), osize,
                                  (int) fs->fs_bsize, cred, bpp, &newb);
                              if (error)
                                      return (error);
      
                              if (DOINGSOFTDEP(vp))
                                      softdep_setup_allocdirect(ip, nb, newb,
                                          ip->i_ffs2_db[nb], fs->fs_bsize, osize,
                                          bpp ? *bpp : NULL);
      
                              ip->i_ffs2_size = lblktosize(fs, nb + 1);
                              uvm_vnp_setsize(vp, ip->i_ffs2_size);
                              ip->i_ffs2_db[nb] = newb;
                              ip->i_flag |= IN_CHANGE | IN_UPDATE;
      
                              if (bpp) {
                                      if (flags & B_SYNC)
                                              bwrite(*bpp);
                                      else
                                              bawrite(*bpp);
                              }
                      }
              }
      
              /*
               * The first NDADDR blocks are direct.
               */
              if (lbn < NDADDR) {
      
                      nb = ip->i_ffs2_db[lbn];
      
                      if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) {
                              /*
                               * The direct block is already allocated and the file
                               * extends past this block, thus this must be a whole
                               * block. Just read it, if requested.
                               */
                              if (bpp != NULL) {
                                      error = bread(vp, lbn, fs->fs_bsize, bpp);
                                      if (error) {
                                              brelse(*bpp);
                                              return (error);
                                      }
                              }
      
                              return (0);
                      }
      
                      if (nb != 0) {
                              /*
                               * Consider the need to allocate a fragment.
                               */
                              osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size));
                              nsize = fragroundup(fs, size);
      
                              if (nsize <= osize) {
                                      /*
                                       * The existing block is already at least as
                                       * big as we want. Just read it, if requested.
                                       */
                                      if (bpp != NULL) {
                                              error = bread(vp, lbn, fs->fs_bsize,
                                                  bpp);
                                              if (error) {
                                                      brelse(*bpp);
                                                      return (error);
                                              }
                                              buf_adjcnt((*bpp), osize);
                                      }
      
                                      return (0);
                              } else {
                                      /*
                                       * The existing block is smaller than we want,
                                       * grow it.
                                       */
                                      error = ffs_realloccg(ip, lbn,
                                          ffs2_blkpref(ip, lbn, (int) lbn,
                                          &ip->i_ffs2_db[0]), osize, nsize, cred,
                                          bpp, &newb);
                                      if (error)
                                              return (error);
      
                                      if (DOINGSOFTDEP(vp))
                                              softdep_setup_allocdirect(ip, lbn,
                                                  newb, nb, nsize, osize,
                                                  bpp ? *bpp : NULL);
                              }
                      } else {
                              /*
                               * The block was not previously allocated, allocate a
                               * new block or fragment.
                               */
                              if (ip->i_ffs2_size < lblktosize(fs, lbn + 1))
                                      nsize = fragroundup(fs, size);
                              else
                                      nsize = fs->fs_bsize;
      
                              error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn,
                                  (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb);
                              if (error)
                                      return (error);
      
                              if (bpp != NULL) {
                                      bp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
                                      if (nsize < fs->fs_bsize)
                                              bp->b_bcount = nsize;
                                      bp->b_blkno = fsbtodb(fs, newb);
                                      if (flags & B_CLRBUF)
                                              clrbuf(bp);
                                      *bpp = bp;
                              }
      
                              if (DOINGSOFTDEP(vp))
                                      softdep_setup_allocdirect(ip, lbn, newb, 0,
                                          nsize, 0, bpp ? *bpp : NULL);
                      }
      
                      ip->i_ffs2_db[lbn] = newb;
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
      
                      return (0);
              }
      
              /*
               * Determine the number of levels of indirection.
               */
              pref = 0;
              error = ufs_getlbns(vp, lbn, indirs, &num);
              if (error)
                      return (error);
      
      #ifdef DIAGNOSTIC
              if (num < 1)
                      panic("ffs2_balloc: ufs_bmaparray returned indirect block");
      #endif
      
              /*
               * Fetch the first indirect block allocating it necessary.
               */
              --num;
              nb = ip->i_ffs2_ib[indirs[0].in_off];
              allocib = NULL;
              allocblk = allociblk;
      
              if (nb == 0) {
                      pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
                      error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
                          &newb);
                      if (error)
                              goto fail;
      
                      nb = newb;
                      *allocblk++ = nb;
                      bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, INFSLP);
                      bp->b_blkno = fsbtodb(fs, nb);
                      clrbuf(bp);
      
                      if (DOINGSOFTDEP(vp)) {
                              softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
                                  newb, 0, fs->fs_bsize, 0, bp);
                              bdwrite(bp);
                      } else {
                              /*
                               * Write synchronously so that indirect blocks never
                               * point at garbage.
                               */
                              error = bwrite(bp);
                              if (error)
                                      goto fail;
                      }
      
                      unwindidx = 0;
                      allocib = &ip->i_ffs2_ib[indirs[0].in_off];
                      *allocib = nb;
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
              }
      
              /*
               * Fetch through the indirect blocks, allocating as necessary.
               */
              for (i = 1;;) {
                      error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp);
                      if (error) {
                              brelse(bp);
                              goto fail;
                      }
      
                      bap = (int64_t *) bp->b_data;
                      nb = bap[indirs[i].in_off];
      
                      if (i == num)
                              break;
      
                      i++;
      
                      if (nb != 0) {
                              brelse(bp);
                              continue;
                      }
      
                      if (pref == 0)
                              pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL);
      
                      error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
                          &newb);
                      if (error) {
                              brelse(bp);
                              goto fail;
                      }
      
                      nb = newb;
                      *allocblk++ = nb;
                      nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, INFSLP);
                      nbp->b_blkno = fsbtodb(fs, nb);
                      clrbuf(nbp);
      
                      if (DOINGSOFTDEP(vp)) {
                              softdep_setup_allocindir_meta(nbp, ip, bp,
                                  indirs[i - 1].in_off, nb);
                              bdwrite(nbp);
                      } else {
                              /*
                               * Write synchronously so that indirect blocks never
                               * point at garbage.
                               */
                              error = bwrite(nbp);
                              if (error) {
                                      brelse(bp);
                                      goto fail;
                              }
                      }
      
                      if (unwindidx < 0)
                              unwindidx = i - 1;
      
                      bap[indirs[i - 1].in_off] = nb;
      
                      /*
                       * If required, write synchronously, otherwise use delayed
                       * write.
                       */
                      if (flags & B_SYNC)
                              bwrite(bp);
                      else
                              bdwrite(bp);
              }
      
              /*
               * Get the data block, allocating if necessary.
               */
              if (nb == 0) {
                      pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
      
                      error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
                          &newb);
                      if (error) {
                              brelse(bp);
                              goto fail;
                      }
      
                      nb = newb;
                      *allocblk++ = nb;
      
                      if (bpp != NULL) {
                              nbp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
                              nbp->b_blkno = fsbtodb(fs, nb);
                              if (flags & B_CLRBUF)
                                      clrbuf(nbp);
                              *bpp = nbp;
                      }
      
                      if (DOINGSOFTDEP(vp))
                              softdep_setup_allocindir_page(ip, lbn, bp,
                                  indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
      
                      bap[indirs[num].in_off] = nb;
      
                      if (allocib == NULL && unwindidx < 0)
                              unwindidx = i - 1;
      
                      /*
                       * If required, write synchronously, otherwise use delayed
                       * write.
                       */
                      if (flags & B_SYNC)
                              bwrite(bp);
                      else
                              bdwrite(bp);
      
                      return (0);
              }
      
              brelse(bp);
      
              if (bpp != NULL) {
                      if (flags & B_CLRBUF) {
                              error = bread(vp, lbn, (int)fs->fs_bsize, &nbp);
                              if (error) {
                                      brelse(nbp);
                                      goto fail;
                              }
                      } else {
                              nbp = getblk(vp, lbn, fs->fs_bsize, 0, INFSLP);
                              nbp->b_blkno = fsbtodb(fs, nb);
                              clrbuf(nbp);
                      }
      
                      *bpp = nbp;
              }
      
              return (0);
      
      fail:
              /*
               * If we have failed to allocate any blocks, simply return the error.
               * This is the usual case and avoids the need to fsync the file.
               */
              if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
                      return (error);
              /*
               * If we have failed part way through block allocation, we have to
               * deallocate any indirect blocks that we have allocated. We have to
               * fsync the file before we start to get rid of all of its
               * dependencies so that we do not leave them dangling. We have to sync
               * it at the end so that the softdep code does not find any untracked
               * changes. Although this is really slow, running out of disk space is
               * not expected to be a common occurrence. The error return from fsync
               * is ignored as we already have an error to return to the user.
               */
              VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
              if (unwindidx >= 0) {
                      /*
                       * First write out any buffers we've created to resolve their
                       * softdeps. This must be done in reverse order of creation so
                       * that we resolve the dependencies in one pass.
                       * Write the cylinder group buffers for these buffers too.
                       */
                       for (i = num; i >= unwindidx; i--) {
                               if (i == 0)
                                      break;
      
                              bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize,
                                  0, INFSLP);
                              if (bp->b_flags & B_DELWRI) {
                                      nb = fsbtodb(fs, cgtod(fs, dtog(fs,
                                          dbtofsb(fs, bp->b_blkno))));
                                      bwrite(bp);
                                      bp = getblk(ip->i_devvp, nb,
                                          (int) fs->fs_cgsize, 0, INFSLP);
                                      if (bp->b_flags & B_DELWRI)
                                              bwrite(bp);
                                      else {
                                              bp->b_flags |= B_INVAL;
                                              brelse(bp);
                                      }
                              } else {
                                      bp->b_flags |= B_INVAL;
                                      brelse(bp);
                              }
                      }
      
                      if (DOINGSOFTDEP(vp) && unwindidx == 0) {
                              ip->i_flag |= IN_CHANGE | IN_UPDATE;
                              ffs_update(ip, 1);
                      }
      
                      /*
                       * Now that any dependencies that we created have been
                       * resolved, we can undo the partial allocation.
                       */
                      if (unwindidx == 0) {
                              *allocib = 0;
                              ip->i_flag |= IN_CHANGE | IN_UPDATE;
                              if (DOINGSOFTDEP(vp))
                                      ffs_update(ip, 1);
                      } else {
                              r = bread(vp, indirs[unwindidx].in_lbn,
                                  (int)fs->fs_bsize, &bp);
                              if (r)
                                      panic("ffs2_balloc: unwind failed");
      
                              bap = (int64_t *) bp->b_data;
                              bap[indirs[unwindidx].in_off] = 0;
                              bwrite(bp);
                      }
      
                      for (i = unwindidx + 1; i <= num; i++) {
                              bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
                                  INFSLP);
                              bp->b_flags |= B_INVAL;
                              brelse(bp);
                      }
              }
      
              for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
                      ffs_blkfree(ip, *blkp, fs->fs_bsize);
                      deallocated += fs->fs_bsize;
              }
      
              if (deallocated) {
                      /*
                        * Restore user's disk quota because allocation failed.
                        */
                      (void) ufs_quota_free_blocks(ip, btodb(deallocated), cred);
      
                      ip->i_ffs2_blocks -= btodb(deallocated);
                      ip->i_flag |= IN_CHANGE | IN_UPDATE;
              }
              VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
              return (error);
      }
      #endif /* FFS2 */
      
      /*
       * Balloc defines the structure of file system storage by allocating the
       * physical blocks given the inode and the logical block number in a file.
       */
      int
      ffs_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
          int flags, struct buf **bpp)
  114 {
      #ifdef FFS2
              if (ip->i_fs->fs_magic == FS_UFS2_MAGIC)
                      return (ffs2_balloc(ip, off, size, cred, flags, bpp));
              else
      #endif
  114                 return (ffs1_balloc(ip, off, size, cred, flags, bpp));
      }
      /*        $OpenBSD: vfs_cache.c,v 1.57 2018/06/04 19:42:54 kn Exp $        */
      /*        $NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $        */
      
      /*
       * Copyright (c) 1989, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)vfs_cache.c        8.3 (Berkeley) 8/22/94
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/time.h>
      #include <sys/mount.h>
      #include <sys/vnode.h>
      #include <sys/lock.h>
      #include <sys/namei.h>
      #include <sys/errno.h>
      #include <sys/pool.h>
      
      /*
       * TODO: namecache access should really be locked.
       */
      
      /*
       * For simplicity (and economy of storage), names longer than
       * a maximum length of NAMECACHE_MAXLEN are not cached; they occur
       * infrequently in any case, and are almost never of interest.
       *
       * Upon reaching the last segment of a path, if the reference
       * is for DELETE, or NOCACHE is set (rewrite), and the
       * name is located in the cache, it will be dropped.
       */
      
      /*
       * Structures associated with name caching.
       */
      long        numcache;        /* total number of cache entries allocated */
      long        numneg;                /* number of negative cache entries */
      
      TAILQ_HEAD(, namecache) nclruhead;        /* Regular Entry LRU chain */
      TAILQ_HEAD(, namecache) nclruneghead;        /* Negative Entry LRU chain */
      struct        nchstats nchstats;                /* cache effectiveness statistics */
      
      int doingcache = 1;                        /* 1 => enable the cache */
      
      struct pool nch_pool;
      
      void cache_zap(struct namecache *);
      u_long nextvnodeid;
      
      static inline int
  375 namecache_compare(const struct namecache *n1, const struct namecache *n2)
      {
  387         if (n1->nc_nlen == n2->nc_nlen)
                      return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
              else
                      return (n1->nc_nlen - n2->nc_nlen);
      }
      
   61 RBT_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
  396 RBT_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
      
      void
      cache_tree_init(struct namecache_rb_cache *tree)
   94 {
              RBT_INIT(namecache_rb_cache, tree);
      }
      
      /*
       * blow away a namecache entry
       */
      void
      cache_zap(struct namecache *ncp)
   61 {
              struct vnode *dvp = NULL;
      
              if (ncp->nc_vp != NULL) {
   56                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
                      numcache--;
              } else {
    5                 TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
                      numneg--;
              }
              if (ncp->nc_dvp) {
   61                 RBT_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
                      if (RBT_EMPTY(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree))
                              dvp = ncp->nc_dvp;
              }
   46         if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) {
    8                 if (ncp->nc_vp != ncp->nc_dvp &&
    2                     ncp->nc_vp->v_type == VDIR &&
   17                     (ncp->nc_nlen > 2 ||
                              (ncp->nc_nlen > 1 &&
    8                             ncp->nc_name[1] != '.') ||
    4                         (ncp->nc_nlen > 0 &&
                                  ncp->nc_name[0] != '.'))) {
                              TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
                      }
              }
              pool_put(&nch_pool, ncp);
   55         if (dvp)
   19                 vdrop(dvp);
      }
      
      /*
       * Look for a name in the cache.
       * dvp points to the directory to search. The componentname cnp holds
       * the information on the entry being sought, such as its length
       * and its name. If the lookup succeeds, vpp is set to point to the vnode
       * and an error of 0 is returned. If the lookup determines the name does
       * not exist (negative caching) an error of ENOENT is returned. If the
       * lookup fails, an error of -1 is returned.
       */
      int
      cache_lookup(struct vnode *dvp, struct vnode **vpp,
          struct componentname *cnp)
  427 {
              struct namecache *ncp;
              struct namecache n;
              struct vnode *vp;
              u_long vpid;
              int error;
      
              *vpp = NULL;
      
              if (!doingcache) {
                      cnp->cn_flags &= ~MAKEENTRY;
                      return (-1);
              }
              if (cnp->cn_namelen > NAMECACHE_MAXLEN) {
   29                 nchstats.ncs_long++;
                      cnp->cn_flags &= ~MAKEENTRY;
                      return (-1);
              }
      
              /* lookup in directory vnode's redblack tree */
              n.nc_nlen = cnp->cn_namelen;
              memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
              ncp = RBT_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
      
              if (ncp == NULL) {
  287                 nchstats.ncs_miss++;
                      return (-1);
              }
   25         if ((cnp->cn_flags & MAKEENTRY) == 0) {
                      nchstats.ncs_badhits++;
                      goto remove;
              } else if (ncp->nc_vp == NULL) {
    5                 if (cnp->cn_nameiop != CREATE ||
                          (cnp->cn_flags & ISLASTCN) == 0) {
                              nchstats.ncs_neghits++;
                              /*
                               * Move this slot to end of the negative LRU chain,
                               */
    5                         if (TAILQ_NEXT(ncp, nc_neg) != NULL) {
    1                                 TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
                                      TAILQ_INSERT_TAIL(&nclruneghead, ncp,
                                          nc_neg);
                              }
                              return (ENOENT);
                      } else {
                              nchstats.ncs_badhits++;
                              goto remove;
                      }
   23         } else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
                      nchstats.ncs_falsehits++;
                      goto remove;
              }
      
              /*
               * Move this slot to end of the regular LRU chain.
               */
  198         if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
  291                 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
                      TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
              }
      
              vp = ncp->nc_vp;
              vpid = vp->v_id;
              if (vp == dvp) {        /* lookup on "." */
  279                 vref(dvp);
                      error = 0;
              } else if (cnp->cn_flags & ISDOTDOT) {
                      VOP_UNLOCK(dvp);
                      cnp->cn_flags |= PDIRUNLOCK;
                      error = vget(vp, LK_EXCLUSIVE);
                      /*
                       * If the above vget() succeeded and both LOCKPARENT and
                       * ISLASTCN is set, lock the directory vnode as well.
                       */
    1                 if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
                              if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
                                      vput(vp);
                                      return (error);
                              }
    1                         cnp->cn_flags &= ~PDIRUNLOCK;
                      }
              } else {
                      error = vget(vp, LK_EXCLUSIVE);
                      /*
                       * If the above vget() failed or either of LOCKPARENT or
                       * ISLASTCN is set, unlock the directory vnode.
                       */
   22                 if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
                              VOP_UNLOCK(dvp);
                              cnp->cn_flags |= PDIRUNLOCK;
                      }
              }
      
              /*
               * Check that the lock succeeded, and that the capability number did
               * not change while we were waiting for the lock.
               */
  169         if (error || vpid != vp->v_id) {
                      if (!error) {
                              vput(vp);
                              nchstats.ncs_falsehits++;
                      } else
                              nchstats.ncs_badhits++;
                      /*
                       * The parent needs to be locked when we return to VOP_LOOKUP().
                       * The `.' case here should be extremely rare (if it can happen
                       * at all), so we don't bother optimizing out the unlock/relock.
                       */
                      if (vp == dvp || error ||
                          (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
                              if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
                                      return (error);
                              cnp->cn_flags &= ~PDIRUNLOCK;
                      }
                      return (-1);
              }
      
  359         nchstats.ncs_goodhits++;
              *vpp = vp;
              return (0);
      
      remove:
              /*
               * Last component and we are renaming or deleting,
               * the cache entry is invalid, or otherwise don't
               * want cache entry to exist.
               */
              cache_zap(ncp);
              return (-1);
      }
      
      /*
       * Scan cache looking for name of directory entry pointing at vp.
       *
       * Fill in dvpp.
       *
       * If bufp is non-NULL, also place the name in the buffer which starts
       * at bufp, immediately before *bpp, and move bpp backwards to point
       * at the start of it.  (Yes, this is a little baroque, but it's done
       * this way to cater to the whims of getcwd).
       *
       * Returns 0 on success, -1 on cache miss, positive errno on failure.
       *
       * TODO: should we return *dvpp locked?
       */
      
      int
      cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
      {
              struct namecache *ncp;
              struct vnode *dvp = NULL;
              char *bp;
      
              if (!doingcache)
                      goto out;
              TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
                      dvp = ncp->nc_dvp;
                      if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id)
                              goto found;
              }
              goto miss;
      found:
      #ifdef DIAGNOSTIC
              if (ncp->nc_nlen == 1 &&
                  ncp->nc_name[0] == '.')
                      panic("cache_revlookup: found entry for .");
              if (ncp->nc_nlen == 2 &&
                  ncp->nc_name[0] == '.' &&
                  ncp->nc_name[1] == '.')
                      panic("cache_revlookup: found entry for ..");
      #endif
              nchstats.ncs_revhits++;
      
              if (bufp != NULL) {
                      bp = *bpp;
                      bp -= ncp->nc_nlen;
                      if (bp <= bufp) {
                              *dvpp = NULL;
                              return (ERANGE);
                      }
                      memcpy(bp, ncp->nc_name, ncp->nc_nlen);
                      *bpp = bp;
              }
      
              *dvpp = dvp;
      
              /*
               * XXX: Should we vget() here to have more
               * consistent semantics with cache_lookup()?
               */
              return (0);
      
      miss:
              nchstats.ncs_revmiss++;
      out:
              *dvpp = NULL;
              return (-1);
      }
      
      /*
       * Add an entry to the cache
       */
      void
      cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
  251 {
              struct namecache *ncp, *lncp;
      
              if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN)
                      return;
      
              /*
               * allocate, or recycle (free and allocate) an ncp.
               */
  251         if (numcache >= initialvnodes) {
                      if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
                              cache_zap(ncp);
                      else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
                              cache_zap(ncp);
                      else
                              panic("wtf? leak?");
              }
              ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
      
              /* grab the vnode we just found */
              ncp->nc_vp = vp;
   23         if (vp)
  247                 ncp->nc_vpid = vp->v_id;
      
              /* fill in cache info */
              ncp->nc_dvp = dvp;
              ncp->nc_dvpid = dvp->v_id;
              ncp->nc_nlen = cnp->cn_namelen;
              memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen);
  196         if (RBT_EMPTY(namecache_rb_cache, &dvp->v_nc_tree)) {
   84                 vhold(dvp);
              }
              if ((lncp = RBT_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp))
                  != NULL) {
                      /* someone has raced us and added a different entry
                       * for the same vnode (different ncp) - we don't need
                       * this entry, so free it and we are done.
                       */
                      pool_put(&nch_pool, ncp);
                      /* we know now dvp->v_nc_tree is not empty, no need
                       * to vdrop here
                       */
                      goto done;
              }
              if (vp) {
                      TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
                      numcache++;
                      /* don't put . or .. in the reverse map */
  219                 if (vp != dvp && vp->v_type == VDIR &&
   34                     (ncp->nc_nlen > 2 ||
                              (ncp->nc_nlen > 1 &&
   16                             ncp->nc_name[1] != '.') ||
    3                         (ncp->nc_nlen > 0 &&
                                  ncp->nc_name[0] != '.')))
                              TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp,
                                  nc_me);
              } else {
   23                 TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
                      numneg++;
              }
  251         if (numneg  > initialvnodes) {
                      if ((ncp = TAILQ_FIRST(&nclruneghead))
                          != NULL)
                              cache_zap(ncp);
              }
      done:
              return;
      }
      
      
      /*
       * Name cache initialization, from vfs_init() when we are booting
       */
      void
      nchinit(void)
      {
              TAILQ_INIT(&nclruhead);
              TAILQ_INIT(&nclruneghead);
              pool_init(&nch_pool, sizeof(struct namecache), 0, IPL_NONE, PR_WAITOK,
                  "nchpl", NULL);
      }
      
      /*
       * Cache flush, a particular vnode; called when a vnode is renamed to
       * hide entries that would now be invalid
       */
      void
      cache_purge(struct vnode *vp)
  108 {
              struct namecache *ncp;
      
              /* We should never have destinations cached for a non-VDIR vnode. */
  101         KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst));
      
   12         while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
                      cache_zap(ncp);
  108         while ((ncp = RBT_ROOT(namecache_rb_cache, &vp->v_nc_tree)))
                      cache_zap(ncp);
      
              /* XXX this blows goats */
              vp->v_id = ++nextvnodeid;
  108         if (vp->v_id == 0)
                      vp->v_id = ++nextvnodeid;
      }
      
      /*
       * Cache flush, a whole filesystem; called when filesys is umounted to
       * remove entries that would now be invalid
       */
      void
      cache_purgevfs(struct mount *mp)
      {
              struct namecache *ncp, *nxtcp;
      
              /* whack the regular entries */
              TAILQ_FOREACH_SAFE(ncp, &nclruhead, nc_lru, nxtcp) {
                      if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
                              continue;
                      /* free the resources we had */
                      cache_zap(ncp);
              }
              /* whack the negative entries */
              TAILQ_FOREACH_SAFE(ncp, &nclruneghead, nc_neg, nxtcp) {
                      if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
                              continue;
                      /* free the resources we had */
                      cache_zap(ncp);
              }
      }
      /*        $OpenBSD: in6_var.h,v 1.72 2018/05/06 15:21:25 florian Exp $        */
      /*        $KAME: in6_var.h,v 1.55 2001/02/16 12:49:45 itojun Exp $        */
      
      /*
       * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the project nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      /*
       * Copyright (c) 1985, 1986, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)in_var.h        8.1 (Berkeley) 6/10/93
       */
      
      #ifndef _NETINET6_IN6_VAR_H_
      #define _NETINET6_IN6_VAR_H_
      
      /*
       * Interface address, Internet version.  One of these structures
       * is allocated for each interface with an Internet address.
       * The ifaddr structure contains the protocol-independent part
       * of the structure and is assumed to be first.
       */
      
      /*
       * pltime/vltime are just for future reference (required to implements 2
       * hour rule for hosts).  they should never be modified by nd6_timeout or
       * anywhere else.
       *        userland -> kernel: accept pltime/vltime
       *        kernel -> userland: throw up everything
       *        in kernel: modify preferred/expire only
       */
      struct in6_addrlifetime {
              time_t ia6t_expire;        /* valid lifetime expiration time */
              time_t ia6t_preferred;        /* preferred lifetime expiration time */
              u_int32_t ia6t_vltime;        /* valid lifetime */
              u_int32_t ia6t_pltime;        /* prefix lifetime */
      };
      
      #ifdef _KERNEL
      struct nd_ifinfo;
      struct in6_ifextra {
              struct nd_ifinfo *nd_ifinfo;
              void *rs_lhcookie;
              int nprefixes;
              int ndefrouters;
      };
      
      struct        in6_ifaddr {
              struct        ifaddr ia_ifa;                /* protocol-independent info */
      #define        ia_ifp                ia_ifa.ifa_ifp
      #define        ia_flags        ia_ifa.ifa_flags
      
              struct        sockaddr_in6 ia_addr;        /* interface address */
              struct        sockaddr_in6 ia_dstaddr; /* space for destination addr */
              struct        sockaddr_in6 ia_prefixmask; /* prefix mask */
              TAILQ_ENTRY(in6_ifaddr) ia_list;        /* list of IP6 addresses */
              int        ia6_flags;
      
              struct in6_addrlifetime ia6_lifetime;
              time_t        ia6_updatetime;
      
              /* multicast addresses joined from the kernel */
              LIST_HEAD(, in6_multi_mship) ia6_memberships;
      };
      #endif /* _KERNEL */
      
      /*
       * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
       */
      struct in6_ifstat {
              u_int64_t ifs6_in_receive;        /* # of total input datagram */
              u_int64_t ifs6_in_hdrerr;        /* # of datagrams with invalid hdr */
              u_int64_t ifs6_in_toobig;        /* # of datagrams exceeded MTU */
              u_int64_t ifs6_in_noroute;        /* # of datagrams with no route */
              u_int64_t ifs6_in_addrerr;        /* # of datagrams with invalid dst */
              u_int64_t ifs6_in_protounknown;        /* # of datagrams with unknown proto */
                                              /* NOTE: increment on final dst if */
              u_int64_t ifs6_in_truncated;        /* # of truncated datagrams */
              u_int64_t ifs6_in_discard;        /* # of discarded datagrams */
                                              /* NOTE: fragment timeout is not here */
              u_int64_t ifs6_in_deliver;        /* # of datagrams delivered to ULP */
                                              /* NOTE: increment on final dst if */
              u_int64_t ifs6_out_forward;        /* # of datagrams forwarded */
                                              /* NOTE: increment on outgoing if */
              u_int64_t ifs6_out_request;        /* # of outgoing datagrams from ULP */
                                              /* NOTE: does not include forwrads */
              u_int64_t ifs6_out_discard;        /* # of discarded datagrams */
              u_int64_t ifs6_out_fragok;        /* # of datagrams fragmented */
              u_int64_t ifs6_out_fragfail;        /* # of datagrams failed on fragment */
              u_int64_t ifs6_out_fragcreat;        /* # of fragment datagrams */
                                              /* NOTE: this is # after fragment */
              u_int64_t ifs6_reass_reqd;        /* # of incoming fragmented packets */
                                              /* NOTE: increment on final dst if */
              u_int64_t ifs6_reass_ok;        /* # of reassembled packets */
                                              /* NOTE: this is # after reass */
                                              /* NOTE: increment on final dst if */
              u_int64_t ifs6_reass_fail;        /* # of reass failures */
                                              /* NOTE: may not be packet count */
                                              /* NOTE: increment on final dst if */
              u_int64_t ifs6_in_mcast;        /* # of inbound multicast datagrams */
              u_int64_t ifs6_out_mcast;        /* # of outbound multicast datagrams */
      };
      
      /*
       * ICMPv6 interface statistics, as defined in RFC2466 Ipv6IfIcmpEntry.
       * XXX: I'm not sure if this file is the right place for this structure...
       */
      struct icmp6_ifstat {
              /*
               * Input statistics
               */
              /* ipv6IfIcmpInMsgs, total # of input messages */
              u_int64_t ifs6_in_msg;
              /* ipv6IfIcmpInErrors, # of input error messages */
              u_int64_t ifs6_in_error;
              /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
              u_int64_t ifs6_in_dstunreach;
              /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
              u_int64_t ifs6_in_adminprohib;
              /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
              u_int64_t ifs6_in_timeexceed;
              /* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
              u_int64_t ifs6_in_paramprob;
              /* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
              u_int64_t ifs6_in_pkttoobig;
              /* ipv6IfIcmpInEchos, # of input echo requests */
              u_int64_t ifs6_in_echo;
              /* ipv6IfIcmpInEchoReplies, # of input echo replies */
              u_int64_t ifs6_in_echoreply;
              /* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
              u_int64_t ifs6_in_routersolicit;
              /* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
              u_int64_t ifs6_in_routeradvert;
              /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
              u_int64_t ifs6_in_neighborsolicit;
              /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
              u_int64_t ifs6_in_neighboradvert;
              /* ipv6IfIcmpInRedirects, # of input redirects */
              u_int64_t ifs6_in_redirect;
              /* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
              u_int64_t ifs6_in_mldquery;
              /* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
              u_int64_t ifs6_in_mldreport;
              /* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
              u_int64_t ifs6_in_mlddone;
      
              /*
               * Output statistics. We should solve unresolved routing problem...
               */
              /* ipv6IfIcmpOutMsgs, total # of output messages */
              u_int64_t ifs6_out_msg;
              /* ipv6IfIcmpOutErrors, # of output error messages */
              u_int64_t ifs6_out_error;
              /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
              u_int64_t ifs6_out_dstunreach;
              /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
              u_int64_t ifs6_out_adminprohib;
              /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
              u_int64_t ifs6_out_timeexceed;
              /* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
              u_int64_t ifs6_out_paramprob;
              /* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
              u_int64_t ifs6_out_pkttoobig;
              /* ipv6IfIcmpOutEchos, # of output echo requests */
              u_int64_t ifs6_out_echo;
              /* ipv6IfIcmpOutEchoReplies, # of output echo replies */
              u_int64_t ifs6_out_echoreply;
              /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
              u_int64_t ifs6_out_routersolicit;
              /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
              u_int64_t ifs6_out_routeradvert;
              /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
              u_int64_t ifs6_out_neighborsolicit;
              /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
              u_int64_t ifs6_out_neighboradvert;
              /* ipv6IfIcmpOutRedirects, # of output redirects */
              u_int64_t ifs6_out_redirect;
              /* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
              u_int64_t ifs6_out_mldquery;
              /* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
              u_int64_t ifs6_out_mldreport;
              /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
              u_int64_t ifs6_out_mlddone;
      };
      
      struct        in6_ifreq {
              char        ifr_name[IFNAMSIZ];
              union {
                      struct        sockaddr_in6 ifru_addr;
                      struct        sockaddr_in6 ifru_dstaddr;
                      short        ifru_flags;
                      int        ifru_flags6;
                      int        ifru_metric;
                      caddr_t        ifru_data;
                      struct in6_addrlifetime ifru_lifetime;
                      struct in6_ifstat ifru_stat;
                      struct icmp6_ifstat ifru_icmp6stat;
              } ifr_ifru;
      };
      
      struct        in6_aliasreq {
              char        ifra_name[IFNAMSIZ];
              union {
                      struct        sockaddr_in6 ifrau_addr;
                      int        ifrau_align;
               } ifra_ifrau;
      #ifndef ifra_addr
      #define ifra_addr        ifra_ifrau.ifrau_addr
      #endif
              struct        sockaddr_in6 ifra_dstaddr;
              struct        sockaddr_in6 ifra_prefixmask;
              int        ifra_flags;
              struct in6_addrlifetime ifra_lifetime;
      };
      
      /*
       * Given a pointer to an in6_ifaddr (ifaddr),
       * return a pointer to the addr as a sockaddr_in6
       */
      #define IA6_IN6(ia)        (&((ia)->ia_addr.sin6_addr))
      #define IA6_DSTIN6(ia)        (&((ia)->ia_dstaddr.sin6_addr))
      #define IA6_MASKIN6(ia)        (&((ia)->ia_prefixmask.sin6_addr))
      #define IA6_SIN6(ia)        (&((ia)->ia_addr))
      #define IA6_DSTSIN6(ia)        (&((ia)->ia_dstaddr))
      #define IFA_IN6(x)        (&((struct sockaddr_in6 *)((x)->ifa_addr))->sin6_addr)
      #define IFA_DSTIN6(x)        (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr)
      
      #define SIOCDIFADDR_IN6                 _IOW('i', 25, struct in6_ifreq)
      #define SIOCAIFADDR_IN6                 _IOW('i', 26, struct in6_aliasreq)
      
      #define SIOCGIFDSTADDR_IN6        _IOWR('i', 34, struct in6_ifreq)
      #define SIOCGIFNETMASK_IN6        _IOWR('i', 37, struct in6_ifreq)
      
      #define SIOCGIFAFLAG_IN6        _IOWR('i', 73, struct in6_ifreq)
      
      #define SIOCGIFINFO_IN6                _IOWR('i', 108, struct in6_ndireq)
      #define SIOCGNBRINFO_IN6        _IOWR('i', 78, struct in6_nbrinfo)
      
      #define SIOCGIFALIFETIME_IN6        _IOWR('i', 81, struct in6_ifreq)
      
      #define SIOCGETSGCNT_IN6        _IOWR('u', 106, struct sioc_sg_req6)
      #define SIOCGETMIFCNT_IN6        _IOWR('u', 107, struct sioc_mif_req6)
      
      #define IN6_IFF_ANYCAST                0x01        /* anycast address */
      #define IN6_IFF_TENTATIVE        0x02        /* tentative address */
      #define IN6_IFF_DUPLICATED        0x04        /* DAD detected duplicate */
      #define IN6_IFF_DETACHED        0x08        /* may be detached from the link */
      #define IN6_IFF_DEPRECATED        0x10        /* deprecated address */
      #define IN6_IFF_AUTOCONF        0x40        /* autoconfigurable address. */
      #define IN6_IFF_PRIVACY                0x80        /* RFC 4941 temporary address */
      
      #ifdef _KERNEL
      #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)        (        \
              (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
              (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
              (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
              (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
      
      #define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b))
      #define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b))
      
      /*
       * Multi-cast membership entry.  One for each group/ifp that a PCB
       * belongs to.
       */
      struct in6_multi_mship {
              struct        in6_multi *i6mm_maddr;        /* Multicast address pointer */
              LIST_ENTRY(in6_multi_mship) i6mm_chain;  /* multicast options chain */
      };
      
      struct in6_multi {
              struct ifmaddr                in6m_ifma;   /* Protocol-independent info */
      #define in6m_refcnt                in6m_ifma.ifma_refcnt
      #define in6m_ifidx                in6m_ifma.ifma_ifidx
      
              struct sockaddr_in6        in6m_sin;   /* IPv6 multicast address */
      #define in6m_addr                in6m_sin.sin6_addr
      
              u_int                        in6m_state; /* state of membership */
              u_int                        in6m_timer; /* MLD6 membership report timer */
      };
      
      static __inline struct in6_multi *
      ifmatoin6m(struct ifmaddr *ifma)
      {
    2        return ((struct in6_multi *)(ifma));
      }
      
      /*
       * Macros for looking up the in6_multi record for a given IP6 multicast
       * address on a given interface. If no matching record is found, "in6m"
       * returns NULL.
       */
      #define IN6_LOOKUP_MULTI(addr, ifp, in6m)                                \
              /* struct in6_addr addr; */                                        \
              /* struct ifnet *ifp; */                                        \
              /* struct in6_multi *in6m; */                                        \
      do {                                                                        \
              struct ifmaddr *ifma;                                                \
                                                                              \
              (in6m) = NULL;                                                        \
              TAILQ_FOREACH(ifma, &(ifp)->if_maddrlist, ifma_list)                \
                      if (ifma->ifma_addr->sa_family == AF_INET6 &&                \
                          IN6_ARE_ADDR_EQUAL(&ifmatoin6m(ifma)->in6m_addr,        \
                                             &(addr))) {                        \
                              (in6m) = ifmatoin6m(ifma);                        \
                              break;                                                \
                      }                                                        \
      } while (/* CONSTCOND */ 0)
      
      struct        in6_multi *in6_addmulti(struct in6_addr *, struct ifnet *, int *);
      void        in6_delmulti(struct in6_multi *);
      int        in6_hasmulti(struct in6_addr *, struct ifnet *);
      struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *);
      void        in6_leavegroup(struct in6_multi_mship *);
      int        in6_control(struct socket *, u_long, caddr_t, struct ifnet *);
      int        in6_ioctl(u_long, caddr_t, struct ifnet *, int);
      int        in6_update_ifa(struct ifnet *, struct in6_aliasreq *,
              struct in6_ifaddr *);
      void        in6_purgeaddr(struct ifaddr *);
      int        in6if_do_dad(struct ifnet *);
      void        *in6_domifattach(struct ifnet *);
      void        in6_domifdetach(s