/*        $OpenBSD: ch.c,v 1.53 2019/01/20 20:28:37 krw Exp $        */
      /*        $NetBSD: ch.c,v 1.26 1997/02/21 22:06:52 thorpej Exp $        */
      
      /*
       * Copyright (c) 1996, 1997 Jason R. Thorpe <thorpej@and.com>
       * All rights reserved.
       *
       * Partially based on an autochanger driver written by Stefan Grefen
       * and on an autochanger driver written by the Systems Programming Group
       * at the University of Utah Computer Science Department.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. All advertising materials mentioning features or use of this software
       *    must display the following acknowledgements:
       *        This product includes software developed by Jason R. Thorpe
       *        for And Communications, http://www.and.com/
       * 4. The name of the author may not be used to endorse or promote products
       *    derived from this software without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
       * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
       * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
       * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
       * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/errno.h>
      #include <sys/ioctl.h>
      #include <sys/chio.h>
      #include <sys/device.h>
      #include <sys/malloc.h>
      #include <sys/pool.h>
      #include <sys/conf.h>
      #include <sys/fcntl.h>
      
      #include <scsi/scsi_all.h>
      #include <scsi/scsi_changer.h>
      #include <scsi/scsiconf.h>
      
      #define CHRETRIES        2
      #define CHUNIT(x)        (minor((x)))
      
      struct ch_softc {
              struct device        sc_dev;                /* generic device info */
              struct scsi_link *sc_link;        /* link in the SCSI bus */
      
              int                sc_picker;        /* current picker */
      
              /*
               * The following information is obtained from the
               * element address assignment page.
               */
              int                sc_firsts[4];        /* firsts, indexed by CHET_* */
              int                sc_counts[4];        /* counts, indexed by CHET_* */
      
              /*
               * The following mask defines the legal combinations
               * of elements for the MOVE MEDIUM command.
               */
              u_int8_t        sc_movemask[4];
      
              /*
               * As above, but for EXCHANGE MEDIUM.
               */
              u_int8_t        sc_exchangemask[4];
      
              int                flags;                /* misc. info */
      
              /*
               * Quirks; see below.
               */
              int                sc_settledelay; /* delay for settle */
      
      };
      
      /* sc_flags */
      #define CHF_ROTATE        0x01                /* picker can rotate */
      
      /* Autoconfiguration glue */
      int        chmatch(struct device *, void *, void *);
      void        chattach(struct device *, struct device *, void *);
      
      struct cfattach ch_ca = {
              sizeof(struct ch_softc), chmatch, chattach
      };
      
      struct cfdriver ch_cd = {
              NULL, "ch", DV_DULL
      };
      
      const struct scsi_inquiry_pattern ch_patterns[] = {
              {T_CHANGER, T_REMOV,
               "",                "",                ""},
      };
      
      int        ch_move(struct ch_softc *, struct changer_move *);
      int        ch_exchange(struct ch_softc *, struct changer_exchange *);
      int        ch_position(struct ch_softc *, struct changer_position *);
      int        ch_usergetelemstatus(struct ch_softc *,
          struct changer_element_status_request *);
      int        ch_getelemstatus(struct ch_softc *, int, int, caddr_t, size_t, int);
      int        ch_get_params(struct ch_softc *, int);
      int        ch_interpret_sense(struct scsi_xfer *xs);
      void        ch_get_quirks(struct ch_softc *, struct scsi_inquiry_data *);
      
      /*
       * SCSI changer quirks.
       */
      struct chquirk {
              struct        scsi_inquiry_pattern cq_match; /* device id pattern */
              int        cq_settledelay;        /* settle delay, in seconds */
      };
      
      struct chquirk chquirks[] = {
              {{T_CHANGER, T_REMOV,
                "SPECTRA",        "9000",                "0200"},
               75},
      };
      
      int
      chmatch(struct device *parent, void *match, void *aux)
      {
              struct scsi_attach_args *sa = aux;
              int priority;
      
              (void)scsi_inqmatch(sa->sa_inqbuf,
                  ch_patterns, nitems(ch_patterns),
                  sizeof(ch_patterns[0]), &priority);
      
              return (priority);
      }
      
      void
      chattach(struct device *parent, struct device *self, void *aux)
      {
              struct ch_softc *sc = (struct ch_softc *)self;
              struct scsi_attach_args *sa = aux;
              struct scsi_link *link = sa->sa_sc_link;
      
              /* Glue into the SCSI bus */
              sc->sc_link = link;
              link->interpret_sense = ch_interpret_sense;
              link->device_softc = sc;
              link->openings = 1;
      
              printf("\n");
      
              /*
               * Store our our device's quirks.
               */
              ch_get_quirks(sc, sa->sa_inqbuf);
      
      }
      
      int
      chopen(dev_t dev, int flags, int fmt, struct proc *p)
    1 {
              struct ch_softc *sc;
              int oldcounts[4];
              int i, unit, error = 0;
      
              unit = CHUNIT(dev);
    1         if ((unit >= ch_cd.cd_ndevs) ||
                  ((sc = ch_cd.cd_devs[unit]) == NULL))
                      return (ENXIO);
      
              /*
               * Only allow one open at a time.
               */
              if (sc->sc_link->flags & SDEV_OPEN)
                      return (EBUSY);
      
              sc->sc_link->flags |= SDEV_OPEN;
      
              /*
               * Absorb any unit attention errors. We must notice
               * "Not ready" errors as a changer will report "In the
               * process of getting ready" any time it must rescan
               * itself to determine the state of the changer.
               */
              error = scsi_test_unit_ready(sc->sc_link, TEST_READY_RETRIES,
                  SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE);
              if (error)
                      goto bad;
      
              /*
               * Get information about the device. Save old information
               * so we can decide whether to be verbose about new parameters.
               */
              for (i = 0; i < 4; i++) {
                      oldcounts[i] = sc->sc_counts[i];
              }
              error = ch_get_params(sc, scsi_autoconf);
              if (error)
                      goto bad;
      
              for (i = 0; i < 4; i++) {
                      if (oldcounts[i] != sc->sc_counts[i]) {
                              break;
                      }
              }
              if (i < 4) {
      #ifdef CHANGER_DEBUG
      #define PLURAL(c)        (c) == 1 ? "" : "s"
                      printf("%s: %d slot%s, %d drive%s, %d picker%s, %d portal%s\n",
                          sc->sc_dev.dv_xname,
                          sc->sc_counts[CHET_ST], PLURAL(sc->sc_counts[CHET_ST]),
                          sc->sc_counts[CHET_DT], PLURAL(sc->sc_counts[CHET_DT]),
                          sc->sc_counts[CHET_MT], PLURAL(sc->sc_counts[CHET_MT]),
                          sc->sc_counts[CHET_IE], PLURAL(sc->sc_counts[CHET_IE]));
      #undef PLURAL
                      printf("%s: move mask: 0x%x 0x%x 0x%x 0x%x\n",
                          sc->sc_dev.dv_xname,
                          sc->sc_movemask[CHET_MT], sc->sc_movemask[CHET_ST],
                          sc->sc_movemask[CHET_IE], sc->sc_movemask[CHET_DT]);
                      printf("%s: exchange mask: 0x%x 0x%x 0x%x 0x%x\n",
                          sc->sc_dev.dv_xname,
                          sc->sc_exchangemask[CHET_MT], sc->sc_exchangemask[CHET_ST],
                          sc->sc_exchangemask[CHET_IE], sc->sc_exchangemask[CHET_DT]);
      #endif /* CHANGER_DEBUG */
              }
      
              /* Default the current picker. */
              sc->sc_picker = sc->sc_firsts[CHET_MT];
      
              return (0);
      
      bad:
              sc->sc_link->flags &= ~SDEV_OPEN;
              return (error);
      }
      
      int
      chclose(dev_t dev, int flags, int fmt, struct proc *p)
      {
              struct ch_softc *sc = ch_cd.cd_devs[CHUNIT(dev)];
      
              sc->sc_link->flags &= ~SDEV_OPEN;
              return (0);
      }
      
      int
      chioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p)
      {
              struct ch_softc *sc = ch_cd.cd_devs[CHUNIT(dev)];
              int error = 0;
      
              /*
               * If this command can change the device's state, we must
               * have the device open for writing.
               */
              switch (cmd) {
              case CHIOGPICKER:
              case CHIOGPARAMS:
              case CHIOGSTATUS:
                      break;
      
              default:
                      if ((flags & FWRITE) == 0)
                              return (EBADF);
              }
      
              switch (cmd) {
              case CHIOMOVE:
                      error = ch_move(sc, (struct changer_move *)data);
                      break;
      
              case CHIOEXCHANGE:
                      error = ch_exchange(sc, (struct changer_exchange *)data);
                      break;
      
              case CHIOPOSITION:
                      error = ch_position(sc, (struct changer_position *)data);
                      break;
      
              case CHIOGPICKER:
                      *(int *)data = sc->sc_picker - sc->sc_firsts[CHET_MT];
                      break;
      
              case CHIOSPICKER:        {
                      int new_picker = *(int *)data;
      
                      if (new_picker > (sc->sc_counts[CHET_MT] - 1))
                              return (EINVAL);
                      sc->sc_picker = sc->sc_firsts[CHET_MT] + new_picker;
                      break;                }
      
              case CHIOGPARAMS:        {
                      struct changer_params *cp = (struct changer_params *)data;
      
                      cp->cp_curpicker = sc->sc_picker - sc->sc_firsts[CHET_MT];
                      cp->cp_npickers = sc->sc_counts[CHET_MT];
                      cp->cp_nslots = sc->sc_counts[CHET_ST];
                      cp->cp_nportals = sc->sc_counts[CHET_IE];
                      cp->cp_ndrives = sc->sc_counts[CHET_DT];
                      break;                }
      
              case CHIOGSTATUS:        {
                      struct changer_element_status_request *cesr =
                          (struct changer_element_status_request *)data;
      
                      error = ch_usergetelemstatus(sc, cesr);
                      break;                }
      
              /* Implement prevent/allow? */
      
              default:
                      error = scsi_do_ioctl(sc->sc_link, cmd, data, flags);
                      break;
              }
      
              return (error);
      }
      
      int
      ch_move(struct ch_softc *sc, struct changer_move *cm)
      {
              struct scsi_move_medium *cmd;
              struct scsi_xfer *xs;
              int error;
              u_int16_t fromelem, toelem;
      
              /*
               * Check arguments.
               */
              if ((cm->cm_fromtype > CHET_DT) || (cm->cm_totype > CHET_DT))
                      return (EINVAL);
              if ((cm->cm_fromunit > (sc->sc_counts[cm->cm_fromtype] - 1)) ||
                  (cm->cm_tounit > (sc->sc_counts[cm->cm_totype] - 1)))
                      return (ENODEV);
      
              /*
               * Check the request against the changer's capabilities.
               */
              if ((sc->sc_movemask[cm->cm_fromtype] & (1 << cm->cm_totype)) == 0)
                      return (EINVAL);
      
              /*
               * Calculate the source and destination elements.
               */
              fromelem = sc->sc_firsts[cm->cm_fromtype] + cm->cm_fromunit;
              toelem = sc->sc_firsts[cm->cm_totype] + cm->cm_tounit;
      
              /*
               * Build the SCSI command.
               */
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->retries = CHRETRIES;
              xs->timeout = 100000;
      
              cmd = (struct scsi_move_medium *)xs->cmd;
              cmd->opcode = MOVE_MEDIUM;
              _lto2b(sc->sc_picker, cmd->tea);
              _lto2b(fromelem, cmd->src);
              _lto2b(toelem, cmd->dst);
              if (cm->cm_flags & CM_INVERT)
                      cmd->flags |= MOVE_MEDIUM_INVERT;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      int
      ch_exchange(struct ch_softc *sc, struct changer_exchange *ce)
      {
              struct scsi_exchange_medium *cmd;
              struct scsi_xfer *xs;
              int error;
              u_int16_t src, dst1, dst2;
      
              /*
               * Check arguments.
               */
              if ((ce->ce_srctype > CHET_DT) || (ce->ce_fdsttype > CHET_DT) ||
                  (ce->ce_sdsttype > CHET_DT))
                      return (EINVAL);
              if ((ce->ce_srcunit > (sc->sc_counts[ce->ce_srctype] - 1)) ||
                  (ce->ce_fdstunit > (sc->sc_counts[ce->ce_fdsttype] - 1)) ||
                  (ce->ce_sdstunit > (sc->sc_counts[ce->ce_sdsttype] - 1)))
                      return (ENODEV);
      
              /*
               * Check the request against the changer's capabilities.
               */
              if (((sc->sc_exchangemask[ce->ce_srctype] &
                  (1 << ce->ce_fdsttype)) == 0) ||
                  ((sc->sc_exchangemask[ce->ce_fdsttype] &
                  (1 << ce->ce_sdsttype)) == 0))
                      return (EINVAL);
      
              /*
               * Calculate the source and destination elements.
               */
              src = sc->sc_firsts[ce->ce_srctype] + ce->ce_srcunit;
              dst1 = sc->sc_firsts[ce->ce_fdsttype] + ce->ce_fdstunit;
              dst2 = sc->sc_firsts[ce->ce_sdsttype] + ce->ce_sdstunit;
      
              /*
               * Build the SCSI command.
               */
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->retries = CHRETRIES;
              xs->timeout = 100000;
      
              cmd = (struct scsi_exchange_medium *)xs->cmd;
              cmd->opcode = EXCHANGE_MEDIUM;
              _lto2b(sc->sc_picker, cmd->tea);
              _lto2b(src, cmd->src);
              _lto2b(dst1, cmd->fdst);
              _lto2b(dst2, cmd->sdst);
              if (ce->ce_flags & CE_INVERT1)
                      cmd->flags |= EXCHANGE_MEDIUM_INV1;
              if (ce->ce_flags & CE_INVERT2)
                      cmd->flags |= EXCHANGE_MEDIUM_INV2;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      int
      ch_position(struct ch_softc *sc, struct changer_position *cp)
      {
              struct scsi_position_to_element *cmd;
              struct scsi_xfer *xs;
              int error;
              u_int16_t dst;
      
              /*
               * Check arguments.
               */
              if (cp->cp_type > CHET_DT)
                      return (EINVAL);
              if (cp->cp_unit > (sc->sc_counts[cp->cp_type] - 1))
                      return (ENODEV);
      
              /*
               * Calculate the destination element.
               */
              dst = sc->sc_firsts[cp->cp_type] + cp->cp_unit;
      
              /*
               * Build the SCSI command.
               */
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->retries = CHRETRIES;
              xs->timeout = 100000;
      
              cmd = (struct scsi_position_to_element *)xs->cmd;
              cmd->opcode = POSITION_TO_ELEMENT;
              _lto2b(sc->sc_picker, cmd->tea);
              _lto2b(dst, cmd->dst);
              if (cp->cp_flags & CP_INVERT)
                      cmd->flags |= POSITION_TO_ELEMENT_INVERT;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Copy a volume tag to a volume_tag struct, converting SCSI byte order
       * to host native byte order in the volume serial number.  The volume
       * label as returned by the changer is transferred to user mode as
       * nul-terminated string.  Volume labels are truncated at the first
       * space, as suggested by SCSI-2.
       */
      static  void
      copy_voltag(struct changer_voltag *uvoltag, struct volume_tag *voltag)
      {
              int i;
      
              for (i=0; i<CH_VOLTAG_MAXLEN; i++) {
                      char c = voltag->vif[i];
                      if (c && c != ' ')
                              uvoltag->cv_volid[i] = c;
                      else
                              break;
              }
              uvoltag->cv_volid[i] = '\0';
              uvoltag->cv_serial = _2btol(voltag->vsn);
      }
      
      /*
       * Copy an an element status descriptor to a user-mode
       * changer_element_status structure.
       */
      static void
      copy_element_status(int flags,        struct read_element_status_descriptor *desc,
          struct changer_element_status *ces)
      {
              ces->ces_flags = desc->flags1;
      
              if (flags & READ_ELEMENT_STATUS_PVOLTAG)
                      copy_voltag(&ces->ces_pvoltag, &desc->pvoltag);
              if (flags & READ_ELEMENT_STATUS_AVOLTAG)
                      copy_voltag(&ces->ces_avoltag, &desc->avoltag);
      }
      
      /*
       * Perform a READ ELEMENT STATUS on behalf of the user, and return to
       * the user only the data the user is interested in (i.e. an array of
       * changer_element_status structures)
       */
      int
      ch_usergetelemstatus(struct ch_softc *sc,
          struct changer_element_status_request *cesr)
      {
              struct changer_element_status *user_data = NULL;
              struct read_element_status_header *st_hdr;
              struct read_element_status_page_header *pg_hdr;
              caddr_t desc;
              caddr_t data = NULL;
              size_t size, desclen, udsize;
              int chet = cesr->cesr_type;
              int avail, i, error = 0;
              int want_voltags = (cesr->cesr_flags & CESR_VOLTAGS) ? 1 : 0;
      
              /*
               * If there are no elements of the requested type in the changer,
               * the request is invalid.
               */
              if (sc->sc_counts[chet] == 0)
                      return (EINVAL);
      
              /*
               * Request one descriptor for the given element type.  This
               * is used to determine the size of the descriptor so that
               * we can allocate enough storage for all of them.  We assume
               * that the first one can fit into 1k.
               */
              size = 1024;
              data = dma_alloc(size, PR_WAITOK);
              error = ch_getelemstatus(sc, sc->sc_firsts[chet], 1, data, size,
                  want_voltags);
              if (error)
                      goto done;
      
              st_hdr = (struct read_element_status_header *)data;
              pg_hdr = (struct read_element_status_page_header *) (st_hdr + 1);
              desclen = _2btol(pg_hdr->edl);
      
              dma_free(data, size);
      
              /*
               * Reallocate storage for descriptors and get them from the
               * device.
               */
              size = sizeof(struct read_element_status_header) +
                  sizeof(struct read_element_status_page_header) +
                  (desclen * sc->sc_counts[chet]);
              data = dma_alloc(size, PR_WAITOK);
              error = ch_getelemstatus(sc, sc->sc_firsts[chet],
                  sc->sc_counts[chet], data, size, want_voltags);
              if (error)
                      goto done;
      
              /*
               * Fill in the user status array.
               */
              st_hdr = (struct read_element_status_header *)data;
              pg_hdr = (struct read_element_status_page_header *) (st_hdr + 1);
      
              avail = _2btol(st_hdr->count);
              if (avail != sc->sc_counts[chet]) {
                      error = EINVAL;
                      goto done;
              }
      
              user_data = mallocarray(avail, sizeof(struct changer_element_status),
                  M_DEVBUF, M_WAITOK | M_ZERO);
              udsize = avail * sizeof(struct changer_element_status);
      
              desc = (caddr_t)(pg_hdr + 1);
              for (i = 0; i < avail; ++i) {
                      struct changer_element_status *ces = &(user_data[i]);
                      copy_element_status(pg_hdr->flags,
                          (struct read_element_status_descriptor *)desc, ces);
                      desc += desclen;
              }
      
              /* Copy array out to userspace. */
              error = copyout(user_data, cesr->cesr_data, udsize);
      
      done:
              if (data != NULL)
                      dma_free(data, size);
              if (user_data != NULL)
                      free(user_data, M_DEVBUF, udsize);
              return (error);
      }
      
      int
      ch_getelemstatus(struct ch_softc *sc, int first, int count, caddr_t data,
          size_t datalen, int voltag)
      {
              struct scsi_read_element_status *cmd;
              struct scsi_xfer *xs;
              int error;
      
              /*
               * Build SCSI command.
               */
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->data = data;
              xs->datalen = datalen;
              xs->retries = CHRETRIES;
              xs->timeout = 100000;
      
              cmd = (struct scsi_read_element_status *)xs->cmd;
              cmd->opcode = READ_ELEMENT_STATUS;
              _lto2b(first, cmd->sea);
              _lto2b(count, cmd->count);
              _lto3b(datalen, cmd->len);
              if (voltag)
                      cmd->byte2 |= READ_ELEMENT_STATUS_VOLTAG;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Ask the device about itself and fill in the parameters in our
       * softc.
       */
      int
      ch_get_params(struct ch_softc *sc, int flags)
      {
              union scsi_mode_sense_buf *data;
              struct page_element_address_assignment *ea;
              struct page_device_capabilities *cap;
              int error, from;
              u_int8_t *moves, *exchanges;
      
              data = dma_alloc(sizeof(*data), PR_NOWAIT);
              if (data == NULL)
                      return (ENOMEM);
      
              /*
               * Grab info from the element address assignment page (0x1d).
               */
              error = scsi_do_mode_sense(sc->sc_link, 0x1d, data,
                  (void **)&ea, NULL, NULL, NULL, sizeof(*ea), flags, NULL);
              if (error == 0 && ea == NULL)
                      error = EIO;
              if (error != 0) {
      #ifdef CHANGER_DEBUG
                      printf("%s: could not sense element address page\n",
                          sc->sc_dev.dv_xname);
      #endif
                      dma_free(data, sizeof(*data));
                      return (error);
              }
      
              sc->sc_firsts[CHET_MT] = _2btol(ea->mtea);
              sc->sc_counts[CHET_MT] = _2btol(ea->nmte);
              sc->sc_firsts[CHET_ST] = _2btol(ea->fsea);
              sc->sc_counts[CHET_ST] = _2btol(ea->nse);
              sc->sc_firsts[CHET_IE] = _2btol(ea->fieea);
              sc->sc_counts[CHET_IE] = _2btol(ea->niee);
              sc->sc_firsts[CHET_DT] = _2btol(ea->fdtea);
              sc->sc_counts[CHET_DT] = _2btol(ea->ndte);
      
              /* XXX Ask for transport geometry page. */
      
              /*
               * Grab info from the capabilities page (0x1f).
               */
              error = scsi_do_mode_sense(sc->sc_link, 0x1f, data,
                  (void **)&cap, NULL, NULL, NULL, sizeof(*cap), flags, NULL);
              if (cap == NULL)
                      error = EIO;
              if (error != 0) {
      #ifdef CHANGER_DEBUG
                      printf("%s: could not sense capabilities page\n",
                          sc->sc_dev.dv_xname);
      #endif
                      dma_free(data, sizeof(*data));
                      return (error);
              }
      
              bzero(sc->sc_movemask, sizeof(sc->sc_movemask));
              bzero(sc->sc_exchangemask, sizeof(sc->sc_exchangemask));
              moves = &cap->move_from_mt;
              exchanges = &cap->exchange_with_mt;
              for (from = CHET_MT; from <= CHET_DT; ++from) {
                      sc->sc_movemask[from] = moves[from];
                      sc->sc_exchangemask[from] = exchanges[from];
              }
      
              sc->sc_link->flags |= SDEV_MEDIA_LOADED;
              dma_free(data, sizeof(*data));
              return (0);
      }
      
      void
      ch_get_quirks(struct ch_softc *sc, struct scsi_inquiry_data *inqbuf)
      {
              const struct chquirk *match;
              int priority;
      
              sc->sc_settledelay = 0;
      
              match = (const struct chquirk *)scsi_inqmatch(inqbuf,
                  (caddr_t)chquirks,
                  sizeof(chquirks) / sizeof(chquirks[0]),
                  sizeof(chquirks[0]), &priority);
              if (priority != 0) {
                      sc->sc_settledelay = match->cq_settledelay;
              }
      }
      
      /*
       * Look at the returned sense and act on the error and detirmine
       * The unix error number to pass back... (0 = report no error)
       *                            (-1 = continue processing)
       */
      int
      ch_interpret_sense(struct scsi_xfer *xs)
      {
              struct scsi_sense_data *sense = &xs->sense;
              struct scsi_link *link = xs->sc_link;
              u_int8_t serr = sense->error_code & SSD_ERRCODE;
              u_int8_t skey = sense->flags & SSD_KEY;
      
              if (((link->flags & SDEV_OPEN) == 0) ||
                  (serr != SSD_ERRCODE_CURRENT && serr != SSD_ERRCODE_DEFERRED))
                      return (scsi_interpret_sense(xs));
      
              switch (skey) {
      
              /*
               * We do custom processing in ch for the unit becoming ready case.
               * in this case we do not allow xs->retries to be decremented
               * only on the "Unit Becoming Ready" case. This is because tape
               * changers report "Unit Becoming Ready" when they rescan their
               * state (i.e. when the door got opened) and can take a long time
               * for large units. Rather than having a massive timeout for
               * all operations (which would cause other problems) we allow
               * changers to wait (but be interruptable with Ctrl-C) forever
               * as long as they are reporting that they are becoming ready.
               * all other cases are handled as per the default.
               */
              case SKEY_NOT_READY:
                      if ((xs->flags & SCSI_IGNORE_NOT_READY) != 0)
                              return (0);
                      switch (ASC_ASCQ(sense)) {
                      case SENSE_NOT_READY_BECOMING_READY:
                              SC_DEBUG(link, SDEV_DB1, ("not ready: busy (%#x)\n",
                                  sense->add_sense_code_qual));
                              /* don't count this as a retry */
                              xs->retries++;
                              return (scsi_delay(xs, 1));
                      default:
                              return (scsi_interpret_sense(xs));
              }
              default:
                      return (scsi_interpret_sense(xs));
              }
      }
      /*        $OpenBSD: cd.c,v 1.225 2019/08/17 15:31:41 krw Exp $        */
      /*        $NetBSD: cd.c,v 1.100 1997/04/02 02:29:30 mycroft Exp $        */
      
      /*
       * Copyright (c) 1994, 1995, 1997 Charles M. Hannum.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. All advertising materials mentioning features or use of this software
       *    must display the following acknowledgement:
       *        This product includes software developed by Charles M. Hannum.
       * 4. The name of the author may not be used to endorse or promote products
       *    derived from this software without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      /*
       * Originally written by Julian Elischer (julian@tfs.com)
       * for TRW Financial Systems for use under the MACH(2.5) operating system.
       *
       * TRW Financial Systems, in accordance with their agreement with Carnegie
       * Mellon University, makes this software available to CMU to distribute
       * or use in any manner that they see fit as long as this message is kept with
       * the software. For this reason TFS also grants any other persons or
       * organisations permission to use or modify this software.
       *
       * TFS supplies this software to be publicly redistributed
       * on the understanding that TFS is not responsible for the correct
       * functioning of this software in any circumstances.
       *
       * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/timeout.h>
      #include <sys/fcntl.h>
      #include <sys/stat.h>
      #include <sys/ioctl.h>
      #include <sys/mtio.h>
      #include <sys/buf.h>
      #include <sys/uio.h>
      #include <sys/malloc.h>
      #include <sys/pool.h>
      #include <sys/errno.h>
      #include <sys/device.h>
      #include <sys/disklabel.h>
      #include <sys/disk.h>
      #include <sys/cdio.h>
      #include <sys/conf.h>
      #include <sys/scsiio.h>
      #include <sys/dkio.h>
      #include <sys/vnode.h>
      
      #include <scsi/scsi_all.h>
      #include <scsi/cd.h>
      #include <scsi/scsi_disk.h>        /* rw_big and start_stop come from there */
      #include <scsi/scsiconf.h>
      
      
      #include <ufs/ffs/fs.h>                /* for BBSIZE and SBSIZE */
      
      #define        CDOUTSTANDING        4
      
      #define MAXTRACK        99
      #define CD_FRAMES        75
      #define CD_SECS                60
      
      struct cd_toc {
              struct ioc_toc_header header;
              struct cd_toc_entry entries[MAXTRACK+1]; /* One extra for the */
                                                       /* leadout */
      };
      
      int        cdmatch(struct device *, void *, void *);
      void        cdattach(struct device *, struct device *, void *);
      int        cdactivate(struct device *, int);
      int        cddetach(struct device *, int);
      
      struct cd_softc {
              struct device sc_dev;
              struct disk sc_dk;
      
              int sc_flags;
      #define        CDF_ANCIENT        0x10                /* disk is ancient; for minphys */
      #define        CDF_DYING        0x40                /* dying, when deactivated */
      #define CDF_WAITING        0x100
              struct scsi_link *sc_link;        /* contains our targ, lun, etc. */
              struct cd_parms {
                      u_int32_t secsize;
                      u_int64_t disksize;        /* total number sectors */
              } params;
              struct bufq        sc_bufq;
              struct scsi_xshandler sc_xsh;
              struct timeout sc_timeout;
      };
      
      void        cdstart(struct scsi_xfer *);
      void        cd_buf_done(struct scsi_xfer *);
      void        cdminphys(struct buf *);
      int        cdgetdisklabel(dev_t, struct cd_softc *, struct disklabel *, int);
      int        cd_setchan(struct cd_softc *, int, int, int, int, int);
      int        cd_getvol(struct cd_softc *cd, struct ioc_vol *, int);
      int        cd_setvol(struct cd_softc *, const struct ioc_vol *, int);
      int        cd_load_unload(struct cd_softc *, int, int);
      int        cd_set_pa_immed(struct cd_softc *, int);
      int        cd_play(struct cd_softc *, int, int);
      int        cd_play_tracks(struct cd_softc *, int, int, int, int);
      int        cd_play_msf(struct cd_softc *, int, int, int, int, int, int);
      int        cd_pause(struct cd_softc *, int);
      int        cd_reset(struct cd_softc *);
      int        cd_read_subchannel(struct cd_softc *, int, int, int,
                  struct cd_sub_channel_info *, int );
      int        cd_read_toc(struct cd_softc *, int, int, void *, int, int);
      int        cd_get_parms(struct cd_softc *, int);
      int        cd_load_toc(struct cd_softc *, struct cd_toc *, int);
      int        cd_interpret_sense(struct scsi_xfer *);
      u_int64_t cd_size(struct scsi_link *, int, u_int32_t *);
      
      int        dvd_auth(struct cd_softc *, union dvd_authinfo *);
      int        dvd_read_physical(struct cd_softc *, union dvd_struct *);
      int        dvd_read_copyright(struct cd_softc *, union dvd_struct *);
      int        dvd_read_disckey(struct cd_softc *, union dvd_struct *);
      int        dvd_read_bca(struct cd_softc *, union dvd_struct *);
      int        dvd_read_manufact(struct cd_softc *, union dvd_struct *);
      int        dvd_read_struct(struct cd_softc *, union dvd_struct *);
      
      #if defined(__macppc__)
      int        cd_eject(void);
      #endif
      
      struct cfattach cd_ca = {
              sizeof(struct cd_softc), cdmatch, cdattach,
              cddetach, cdactivate
      };
      
      struct cfdriver cd_cd = {
              NULL, "cd", DV_DISK
      };
      
      const struct scsi_inquiry_pattern cd_patterns[] = {
              {T_CDROM, T_REMOV,
               "",         "",                 ""},
              {T_CDROM, T_FIXED,
               "",         "",                 ""},
              {T_WORM, T_REMOV,
               "",         "",                 ""},
              {T_WORM, T_FIXED,
               "",         "",                 ""},
              {T_DIRECT, T_REMOV,
               "NEC                 CD-ROM DRIVE:260", "", ""},
      #if 0
              {T_CDROM, T_REMOV, /* more luns */
               "PIONEER ", "CD-ROM DRM-600  ", ""},
      #endif
      };
      
      #define cdlookup(unit) (struct cd_softc *)disk_lookup(&cd_cd, (unit))
      
      int
      cdmatch(struct device *parent, void *match, void *aux)
      {
              struct scsi_attach_args *sa = aux;
              int priority;
      
              scsi_inqmatch(sa->sa_inqbuf, cd_patterns, nitems(cd_patterns),
                  sizeof(cd_patterns[0]), &priority);
      
              return (priority);
      }
      
      /*
       * The routine called by the low level scsi routine when it discovers
       * A device suitable for this driver
       */
      void
      cdattach(struct device *parent, struct device *self, void *aux)
      {
              struct cd_softc *sc = (struct cd_softc *)self;
              struct scsi_attach_args *sa = aux;
              struct scsi_link *link = sa->sa_sc_link;
      
              SC_DEBUG(link, SDEV_DB2, ("cdattach:\n"));
      
              /*
               * Store information needed to contact our base driver
               */
              sc->sc_link = link;
              link->interpret_sense = cd_interpret_sense;
              link->device_softc = sc;
              if (link->openings > CDOUTSTANDING)
                      link->openings = CDOUTSTANDING;
      
              /*
               * Initialize disk structures.
               */
              sc->sc_dk.dk_name = sc->sc_dev.dv_xname;
              bufq_init(&sc->sc_bufq, BUFQ_DEFAULT);
      
              /*
               * Note if this device is ancient.  This is used in cdminphys().
               */
              if (!(link->flags & SDEV_ATAPI) &&
                  SCSISPC(sa->sa_inqbuf->version) == 0)
                      sc->sc_flags |= CDF_ANCIENT;
      
              printf("\n");
      
              scsi_xsh_set(&sc->sc_xsh, link, cdstart);
              timeout_set(&sc->sc_timeout, (void (*)(void *))scsi_xsh_add,
                  &sc->sc_xsh);
      
              /* Attach disk. */
              sc->sc_dk.dk_flags = DKF_NOLABELREAD;
              disk_attach(&sc->sc_dev, &sc->sc_dk);
      }
      
      
      int
      cdactivate(struct device *self, int act)
      {
              struct cd_softc *sc = (struct cd_softc *)self;
      
              switch (act) {
              case DVACT_RESUME:
                      /*
                       * When resuming, hardware may have forgotten we locked it. So if
                       * there are any open partitions, lock the CD.
                       */
                      if (sc->sc_dk.dk_openmask != 0)
                              scsi_prevent(sc->sc_link, PR_PREVENT,
                                  SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE |
                                  SCSI_SILENT | SCSI_AUTOCONF);
                      break;
              case DVACT_DEACTIVATE:
                      sc->sc_flags |= CDF_DYING;
                      scsi_xsh_del(&sc->sc_xsh);
                      break;
              }
              return (0);
      }
      
      int
      cddetach(struct device *self, int flags)
      {
              struct cd_softc *sc = (struct cd_softc *)self;
      
              bufq_drain(&sc->sc_bufq);
      
              disk_gone(cdopen, self->dv_unit);
      
              /* Detach disk. */
              bufq_destroy(&sc->sc_bufq);
              disk_detach(&sc->sc_dk);
      
              return (0);
      }
      
      /*
       * Open the device. Make sure the partition info is as up-to-date as can be.
       */
      int
      cdopen(dev_t dev, int flag, int fmt, struct proc *p)
    2 {
              struct scsi_link *link;
              struct cd_softc *sc;
              int error = 0, part, rawopen, unit;
      
              unit = DISKUNIT(dev);
              part = DISKPART(dev);
      
              rawopen = (part == RAW_PART) && (fmt == S_IFCHR);
      
              sc = cdlookup(unit);
    2         if (sc == NULL)
                      return (ENXIO);
              if (sc->sc_flags & CDF_DYING) {
                      device_unref(&sc->sc_dev);
                      return (ENXIO);
              }
      
              link = sc->sc_link;
              SC_DEBUG(link, SDEV_DB1,
                  ("cdopen: dev=0x%x (unit %d (of %d), partition %d)\n", dev, unit,
                  cd_cd.cd_ndevs, part));
      
              if ((error = disk_lock(&sc->sc_dk)) != 0) {
                      device_unref(&sc->sc_dev);
                      return (error);
              }
      
              if (sc->sc_dk.dk_openmask != 0) {
                      /*
                       * If any partition is open, but the disk has been invalidated,
                       * disallow further opens.
                       */
                      if ((link->flags & SDEV_MEDIA_LOADED) == 0) {
                              if (rawopen)
                                      goto out;
                              error = EIO;
                              goto bad;
                      }
              } else {
                      /*
                       * Check that it is still responding and ok.  Drive can be in
                       * progress of loading media so use increased retries number
                       * and don't ignore NOT_READY.
                       */
      
                      /* Use cd_interpret_sense() now. */
                      link->flags |= SDEV_OPEN;
      
                      error = scsi_test_unit_ready(link, TEST_READY_RETRIES,
                          (rawopen ? SCSI_SILENT : 0) | SCSI_IGNORE_ILLEGAL_REQUEST |
                          SCSI_IGNORE_MEDIA_CHANGE);
      
                      /* Start the cd spinning if necessary. */
                      if (error == EIO)
                              error = scsi_start(link, SSS_START,
                                  SCSI_IGNORE_ILLEGAL_REQUEST |
                                  SCSI_IGNORE_MEDIA_CHANGE | SCSI_SILENT);
      
                      if (error) {
                              if (rawopen) {
                                      error = 0;
                                      goto out;
                              } else
                                      goto bad;
                      }
      
                      /* Lock the cd in. */
                      error = scsi_prevent(link, PR_PREVENT,
                          SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE |
                          SCSI_SILENT);
                      if (error)
                              goto bad;
      
                      /* Load the physical device parameters. */
                      link->flags |= SDEV_MEDIA_LOADED;
                      if (cd_get_parms(sc, (rawopen ? SCSI_SILENT : 0) |
                          SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE)) {
                              link->flags &= ~SDEV_MEDIA_LOADED;
                              error = ENXIO;
                              goto bad;
                      }
                      SC_DEBUG(link, SDEV_DB3, ("Params loaded\n"));
      
                      /* Fabricate a disk label. */
                      cdgetdisklabel(dev, sc, sc->sc_dk.dk_label, 0);
                      SC_DEBUG(link, SDEV_DB3, ("Disklabel fabricated\n"));
              }
      
      out:
              if ((error = disk_openpart(&sc->sc_dk, part, fmt, 1)) != 0)
                      goto bad;
      
              link->flags |= SDEV_OPEN;
              SC_DEBUG(link, SDEV_DB3, ("open complete\n"));
      
              /* It's OK to fall through because dk_openmask is now non-zero. */
      bad:
              if (sc->sc_dk.dk_openmask == 0) {
                      scsi_prevent(link, PR_ALLOW,
                          SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE |
                          SCSI_SILENT);
                      link->flags &= ~(SDEV_OPEN | SDEV_MEDIA_LOADED);
              }
      
              disk_unlock(&sc->sc_dk);
              device_unref(&sc->sc_dev);
              return (error);
      }
      
      /*
       * Close the device. Only called if we are the last occurrence of an open
       * device.
       */
      int
      cdclose(dev_t dev, int flag, int fmt, struct proc *p)
      {
              struct cd_softc *sc;
              int part = DISKPART(dev);
      
              sc = cdlookup(DISKUNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              if (sc->sc_flags & CDF_DYING) {
                      device_unref(&sc->sc_dev);
                      return (ENXIO);
              }
      
              disk_lock_nointr(&sc->sc_dk);
      
              disk_closepart(&sc->sc_dk, part, fmt);
      
              if (sc->sc_dk.dk_openmask == 0) {
                      /* XXXX Must wait for I/O to complete! */
      
                      scsi_prevent(sc->sc_link, PR_ALLOW,
                          SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_NOT_READY |
                          SCSI_SILENT);
                      sc->sc_link->flags &= ~(SDEV_OPEN | SDEV_MEDIA_LOADED);
      
                      if (sc->sc_link->flags & SDEV_EJECTING) {
                              scsi_start(sc->sc_link, SSS_STOP|SSS_LOEJ, 0);
      
                              sc->sc_link->flags &= ~SDEV_EJECTING;
                      }
      
                      timeout_del(&sc->sc_timeout);
                      scsi_xsh_del(&sc->sc_xsh);
              }
      
              disk_unlock(&sc->sc_dk);
      
              device_unref(&sc->sc_dev);
              return 0;
      }
      
      /*
       * Actually translate the requested transfer into one the physical driver can
       * understand.  The transfer is described by a buf and will include only one
       * physical transfer.
       */
      void
      cdstrategy(struct buf *bp)
      {
              struct cd_softc *sc;
              int s;
      
              sc = cdlookup(DISKUNIT(bp->b_dev));
              if (sc == NULL) {
                      bp->b_error = ENXIO;
                      goto bad;
              }
              if (sc->sc_flags & CDF_DYING) {
                      bp->b_error = ENXIO;
                      goto bad;
              }
      
              SC_DEBUG(sc->sc_link, SDEV_DB2, ("cdstrategy: %ld bytes @ blk %lld\n",
                  bp->b_bcount, (long long)bp->b_blkno));
              /*
               * If the device has been made invalid, error out
               * maybe the media changed, or no media loaded
               */
              if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
                      bp->b_error = EIO;
                      goto bad;
              }
      
              /* Validate the request. */
              if (bounds_check_with_label(bp, sc->sc_dk.dk_label) == -1)
                      goto done;
      
              /* Place it in the queue of disk activities for this disk. */
              bufq_queue(&sc->sc_bufq, bp);
      
              /*
               * Tell the device to get going on the transfer if it's
               * not doing anything, otherwise just wait for completion
               */
              scsi_xsh_add(&sc->sc_xsh);
      
              device_unref(&sc->sc_dev);
              return;
      
      bad:
              SET(bp->b_flags, B_ERROR);
              bp->b_resid = bp->b_bcount;
      done:
              s = splbio();
              biodone(bp);
              splx(s);
              if (sc != NULL)
                      device_unref(&sc->sc_dev);
      }
      
      /*
       * cdstart looks to see if there is a buf waiting for the device
       * and that the device is not already busy. If both are true,
       * It deques the buf and creates a scsi command to perform the
       * transfer in the buf. The transfer request will call scsi_done
       * on completion, which will in turn call this routine again
       * so that the next queued transfer is performed.
       * The bufs are queued by the strategy routine (cdstrategy)
       *
       * This routine is also called after other non-queued requests
       * have been made of the scsi driver, to ensure that the queue
       * continues to be drained.
       *
       * must be called at the correct (highish) spl level
       * cdstart() is called at splbio from cdstrategy and scsi_done
       */
      void
      cdstart(struct scsi_xfer *xs)
      {
              struct scsi_link *link = xs->sc_link;
              struct cd_softc *sc = link->device_softc;
              struct buf *bp;
              struct scsi_rw_big *cmd_big;
              struct scsi_rw *cmd_small;
              u_int64_t secno, nsecs;
              struct partition *p;
              int read;
      
              SC_DEBUG(link, SDEV_DB2, ("cdstart\n"));
      
              if (sc->sc_flags & CDF_DYING) {
                      scsi_xs_put(xs);
                      return;
              }
      
              /*
               * If the device has become invalid, abort all the
               * reads and writes until all files have been closed and
               * re-opened
               */
              if ((link->flags & SDEV_MEDIA_LOADED) == 0) {
                      bufq_drain(&sc->sc_bufq);
                      scsi_xs_put(xs);
                      return;
              }
      
              bp = bufq_dequeue(&sc->sc_bufq);
              if (bp == NULL) {
                      scsi_xs_put(xs);
                      return;
              }
      
              /*
               * We have a buf, now we should make a command
               *
               * First, translate the block to absolute and put it in terms
               * of the logical blocksize of the device.
               */
              secno = DL_BLKTOSEC(sc->sc_dk.dk_label, bp->b_blkno);
              p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
              secno += DL_GETPOFFSET(p);
              nsecs = howmany(bp->b_bcount, sc->sc_dk.dk_label->d_secsize);
      
              read = (bp->b_flags & B_READ);
      
              /*
               *  Fill out the scsi command.  If the transfer will
               *  fit in a "small" cdb, use it.
               */
              if (!(link->flags & SDEV_ATAPI) &&
                  !(link->quirks & SDEV_ONLYBIG) &&
                  ((secno & 0x1fffff) == secno) &&
                  ((nsecs & 0xff) == nsecs)) {
                      /*
                       * We can fit in a small cdb.
                       */
                      cmd_small = (struct scsi_rw *)xs->cmd;
                      cmd_small->opcode = read ?
                          READ_COMMAND : WRITE_COMMAND;
                      _lto3b(secno, cmd_small->addr);
                      cmd_small->length = nsecs & 0xff;
                      xs->cmdlen = sizeof(*cmd_small);
              } else {
                      /*
                       * Need a large cdb.
                       */
                      cmd_big = (struct scsi_rw_big *)xs->cmd;
                      cmd_big->opcode = read ?
                          READ_BIG : WRITE_BIG;
                      _lto4b(secno, cmd_big->addr);
                      _lto2b(nsecs, cmd_big->length);
                      xs->cmdlen = sizeof(*cmd_big);
              }
      
              xs->flags |= (read ? SCSI_DATA_IN : SCSI_DATA_OUT);
              xs->timeout = 30000;
              xs->data = bp->b_data;
              xs->datalen = bp->b_bcount;
              xs->done = cd_buf_done;
              xs->cookie = bp;
              xs->bp = bp;
      
              /* Instrumentation. */
              disk_busy(&sc->sc_dk);
      
              scsi_xs_exec(xs);
      
              if (ISSET(sc->sc_flags, CDF_WAITING))
                      CLR(sc->sc_flags, CDF_WAITING);
              else if (bufq_peek(&sc->sc_bufq))
                      scsi_xsh_add(&sc->sc_xsh);
      }
      
      void
      cd_buf_done(struct scsi_xfer *xs)
      {
              struct cd_softc *sc = xs->sc_link->device_softc;
              struct buf *bp = xs->cookie;
              int error, s;
      
              switch (xs->error) {
              case XS_NOERROR:
                      bp->b_error = 0;
                      CLR(bp->b_flags, B_ERROR);
                      bp->b_resid = xs->resid;
                      break;
      
              case XS_SENSE:
              case XS_SHORTSENSE:
      #ifdef SCSIDEBUG
                      scsi_sense_print_debug(xs);
      #endif
                      error = cd_interpret_sense(xs);
                      if (error == 0) {
                              bp->b_error = 0;
                              CLR(bp->b_flags, B_ERROR);
                              bp->b_resid = xs->resid;
                              break;
                      }
                      if (error != ERESTART)
                              xs->retries = 0;
                      goto retry;
      
              case XS_BUSY:
                      if (xs->retries) {
                              if (scsi_delay(xs, 1) != ERESTART)
                                      xs->retries = 0;
                      }
                      goto retry;
      
              case XS_TIMEOUT:
      retry:
                      if (xs->retries--) {
                              scsi_xs_exec(xs);
                              return;
                      }
                      /* FALLTHROUGH */
      
              default:
                      bp->b_error = EIO;
                      SET(bp->b_flags, B_ERROR);
                      bp->b_resid = bp->b_bcount;
                      break;
              }
      
              disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid, bp->b_blkno,
                  bp->b_flags & B_READ);
      
              s = splbio();
              biodone(bp);
              splx(s);
              scsi_xs_put(xs);
      }
      
      void
      cdminphys(struct buf *bp)
      {
              struct cd_softc *sc;
              long max;
      
              sc = cdlookup(DISKUNIT(bp->b_dev));
              if (sc == NULL)
                      return;
      
              /*
               * If the device is ancient, we want to make sure that
               * the transfer fits into a 6-byte cdb.
               *
               * XXX Note that the SCSI-I spec says that 256-block transfers
               * are allowed in a 6-byte read/write, and are specified
               * by setting the "length" to 0.  However, we're conservative
               * here, allowing only 255-block transfers in case an
               * ancient device gets confused by length == 0.  A length of 0
               * in a 10-byte read/write actually means 0 blocks.
               */
              if (sc->sc_flags & CDF_ANCIENT) {
                      max = sc->sc_dk.dk_label->d_secsize * 0xff;
      
                      if (bp->b_bcount > max)
                              bp->b_bcount = max;
              }
      
              (*sc->sc_link->adapter->scsi_minphys)(bp, sc->sc_link);
      
              device_unref(&sc->sc_dev);
      }
      
      int
      cdread(dev_t dev, struct uio *uio, int ioflag)
      {
      
              return (physio(cdstrategy, dev, B_READ, cdminphys, uio));
      }
      
      int
      cdwrite(dev_t dev, struct uio *uio, int ioflag)
      {
      
              return (physio(cdstrategy, dev, B_WRITE, cdminphys, uio));
      }
      
      /*
       * Perform special action on behalf of the user.
       * Knows about the internals of this device
       */
      int
      cdioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
      {
              struct cd_softc *sc;
              struct disklabel *lp;
              int part = DISKPART(dev);
              int error = 0;
      
              sc = cdlookup(DISKUNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              if (sc->sc_flags & CDF_DYING) {
                      device_unref(&sc->sc_dev);
                      return (ENXIO);
              }
      
              SC_DEBUG(sc->sc_link, SDEV_DB2, ("cdioctl 0x%lx\n", cmd));
      
              /*
               * If the device is not valid.. abandon ship
               */
              if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
                      switch (cmd) {
                      case DIOCLOCK:
                      case DIOCEJECT:
                      case SCIOCIDENTIFY:
                      case SCIOCCOMMAND:
                      case SCIOCDEBUG:
                      case CDIOCLOADUNLOAD:
                      case SCIOCRESET:
                      case CDIOCGETVOL:
                      case CDIOCSETVOL:
                      case CDIOCSETMONO:
                      case CDIOCSETSTEREO:
                      case CDIOCSETMUTE:
                      case CDIOCSETLEFT:
                      case CDIOCSETRIGHT:
                      case CDIOCCLOSE:
                      case CDIOCEJECT:
                      case CDIOCALLOW:
                      case CDIOCPREVENT:
                      case CDIOCSETDEBUG:
                      case CDIOCCLRDEBUG:
                      case CDIOCRESET:
                      case DVD_AUTH:
                      case DVD_READ_STRUCT:
                      case MTIOCTOP:
                              if (part == RAW_PART)
                                      break;
                      /* FALLTHROUGH */
                      default:
                              if ((sc->sc_link->flags & SDEV_OPEN) == 0)
                                      error = ENODEV;
                              else
                                      error = EIO;
                              goto exit;
                      }
              }
      
              switch (cmd) {
              case DIOCRLDINFO:
                      lp = malloc(sizeof(*lp), M_TEMP, M_WAITOK);
                      cdgetdisklabel(dev, sc, lp, 0);
                      memcpy(sc->sc_dk.dk_label, lp, sizeof(*lp));
                      free(lp, M_TEMP, sizeof(*lp));
                      break;
      
              case DIOCGPDINFO:
                      cdgetdisklabel(dev, sc, (struct disklabel *)addr, 1);
                      break;
      
              case DIOCGDINFO:
                      *(struct disklabel *)addr = *(sc->sc_dk.dk_label);
                      break;
      
              case DIOCGPART:
                      ((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label;
                      ((struct partinfo *)addr)->part =
                          &sc->sc_dk.dk_label->d_partitions[DISKPART(dev)];
                      break;
      
              case DIOCWDINFO:
              case DIOCSDINFO:
                      if ((flag & FWRITE) == 0) {
                              error = EBADF;
                              break;
                      }
      
                      if ((error = disk_lock(&sc->sc_dk)) != 0)
                              break;
      
                      error = setdisklabel(sc->sc_dk.dk_label,
                          (struct disklabel *)addr, sc->sc_dk.dk_openmask);
                      if (error == 0) {
                      }
      
                      disk_unlock(&sc->sc_dk);
                      break;
      
              case CDIOCPLAYTRACKS: {
                      struct ioc_play_track *args = (struct ioc_play_track *)addr;
      
                      if ((error = cd_set_pa_immed(sc, 0)) != 0)
                              break;
                      error = cd_play_tracks(sc, args->start_track,
                          args->start_index, args->end_track, args->end_index);
                      break;
              }
              case CDIOCPLAYMSF: {
                      struct ioc_play_msf *args = (struct ioc_play_msf *)addr;
      
                      if ((error = cd_set_pa_immed(sc, 0)) != 0)
                              break;
                      error = cd_play_msf(sc, args->start_m, args->start_s,
                          args->start_f, args->end_m, args->end_s, args->end_f);
                      break;
              }
              case CDIOCPLAYBLOCKS: {
                      struct ioc_play_blocks *args = (struct ioc_play_blocks *)addr;
      
                      if ((error = cd_set_pa_immed(sc, 0)) != 0)
                              break;
                      error = cd_play(sc, args->blk, args->len);
                      break;
              }
              case CDIOCREADSUBCHANNEL: {
                      struct ioc_read_subchannel *args =
                          (struct ioc_read_subchannel *)addr;
                      struct cd_sub_channel_info *data;
                      int len = args->data_len;
      
                      if (len > sizeof(*data) ||
                          len < sizeof(struct cd_sub_channel_header)) {
                              error = EINVAL;
                              break;
                      }
                      data = dma_alloc(sizeof(*data), PR_WAITOK);
                      error = cd_read_subchannel(sc, args->address_format,
                          args->data_format, args->track, data, len);
                      if (error) {
                              dma_free(data, sizeof(*data));
                              break;
                      }
                      len = min(len, _2btol(data->header.data_len) +
                          sizeof(struct cd_sub_channel_header));
                      error = copyout(data, args->data, len);
                      dma_free(data, sizeof(*data));
                      break;
              }
              case CDIOREADTOCHEADER: {
                      struct ioc_toc_header *th;
      
                      th = dma_alloc(sizeof(*th), PR_WAITOK);
                      if ((error = cd_read_toc(sc, 0, 0, th, sizeof(*th), 0)) != 0) {
                              dma_free(th, sizeof(*th));
                              break;
                      }
                      if (sc->sc_link->quirks & ADEV_LITTLETOC)
                              th->len = letoh16(th->len);
                      else
                              th->len = betoh16(th->len);
                      if (th->len > 0)
                              memcpy(addr, th, sizeof(*th));
                      else
                              error = EIO;
                      dma_free(th, sizeof(*th));
                      break;
              }
              case CDIOREADTOCENTRYS: {
                      struct cd_toc *toc;
                      struct ioc_read_toc_entry *te =
                          (struct ioc_read_toc_entry *)addr;
                      struct ioc_toc_header *th;
                      struct cd_toc_entry *cte;
                      int len = te->data_len;
                      int ntracks;
      
                      toc = dma_alloc(sizeof(*toc), PR_WAITOK | PR_ZERO);
      
                      th = &toc->header;
      
                      if (len > sizeof(toc->entries) ||
                          len < sizeof(struct cd_toc_entry)) {
                              dma_free(toc, sizeof(*toc));
                              error = EINVAL;
                              break;
                      }
                      error = cd_read_toc(sc, te->address_format, te->starting_track,
                          toc, len + sizeof(struct ioc_toc_header), 0);
                      if (error) {
                              dma_free(toc, sizeof(*toc));
                              break;
                      }
                      if (te->address_format == CD_LBA_FORMAT)
                              for (ntracks =
                                  th->ending_track - th->starting_track + 1;
                                  ntracks >= 0; ntracks--) {
                                      cte = &toc->entries[ntracks];
                                      cte->addr_type = CD_LBA_FORMAT;
                                      if (sc->sc_link->quirks & ADEV_LITTLETOC) {
      #if BYTE_ORDER == BIG_ENDIAN
                                              swap16_multi((u_int16_t *)&cte->addr,
                                                  sizeof(cte->addr) / 2);
      #endif
                                      } else
                                              cte->addr.lba = betoh32(cte->addr.lba);
                              }
                      if (sc->sc_link->quirks & ADEV_LITTLETOC) {
                              th->len = letoh16(th->len);
                      } else
                              th->len = betoh16(th->len);
                      len = min(len, th->len - (sizeof(th->starting_track) +
                          sizeof(th->ending_track)));
      
                      error = copyout(toc->entries, te->data, len);
                      dma_free(toc, sizeof(*toc));
                      break;
              }
              case CDIOREADMSADDR: {
                      struct cd_toc *toc;
                      int sessno = *(int *)addr;
                      struct cd_toc_entry *cte;
      
                      if (sessno != 0) {
                              error = EINVAL;
                              break;
                      }
      
                      toc = dma_alloc(sizeof(*toc), PR_WAITOK | PR_ZERO);
      
                      error = cd_read_toc(sc, 0, 0, toc,
                          sizeof(struct ioc_toc_header) + sizeof(struct cd_toc_entry),
                          0x40 /* control word for "get MS info" */);
      
                      if (error) {
                              dma_free(toc, sizeof(*toc));
                              break;
                      }
      
                      cte = &toc->entries[0];
                      if (sc->sc_link->quirks & ADEV_LITTLETOC) {
      #if BYTE_ORDER == BIG_ENDIAN
                              swap16_multi((u_int16_t *)&cte->addr,
                                  sizeof(cte->addr) / 2);
      #endif
                      } else
                              cte->addr.lba = betoh32(cte->addr.lba);
                      if (sc->sc_link->quirks & ADEV_LITTLETOC)
                              toc->header.len = letoh16(toc->header.len);
                      else
                              toc->header.len = betoh16(toc->header.len);
      
                      *(int *)addr = (toc->header.len >= 10 && cte->track > 1) ?
                              cte->addr.lba : 0;
                      dma_free(toc, sizeof(*toc));
                      break;
              }
              case CDIOCSETPATCH: {
                      struct ioc_patch *arg = (struct ioc_patch *)addr;
      
                      error = cd_setchan(sc, arg->patch[0], arg->patch[1],
                          arg->patch[2], arg->patch[3], 0);
                      break;
              }
              case CDIOCGETVOL: {
                      struct ioc_vol *arg = (struct ioc_vol *)addr;
      
                      error = cd_getvol(sc, arg, 0);
                      break;
              }
              case CDIOCSETVOL: {
                      struct ioc_vol *arg = (struct ioc_vol *)addr;
      
                      error = cd_setvol(sc, arg, 0);
                      break;
              }
      
              case CDIOCSETMONO:
                      error = cd_setchan(sc, BOTH_CHANNEL, BOTH_CHANNEL, MUTE_CHANNEL,
                          MUTE_CHANNEL, 0);
                      break;
      
              case CDIOCSETSTEREO:
                      error = cd_setchan(sc, LEFT_CHANNEL, RIGHT_CHANNEL,
                          MUTE_CHANNEL, MUTE_CHANNEL, 0);
                      break;
      
              case CDIOCSETMUTE:
                      error = cd_setchan(sc, MUTE_CHANNEL, MUTE_CHANNEL, MUTE_CHANNEL,
                          MUTE_CHANNEL, 0);
                      break;
      
              case CDIOCSETLEFT:
                      error = cd_setchan(sc, LEFT_CHANNEL, LEFT_CHANNEL, MUTE_CHANNEL,
                          MUTE_CHANNEL, 0);
                      break;
      
              case CDIOCSETRIGHT:
                      error = cd_setchan(sc, RIGHT_CHANNEL, RIGHT_CHANNEL,
                          MUTE_CHANNEL, MUTE_CHANNEL, 0);
                      break;
      
              case CDIOCRESUME:
                      error = cd_pause(sc, 1);
                      break;
      
              case CDIOCPAUSE:
                      error = cd_pause(sc, 0);
                      break;
              case CDIOCSTART:
                      error = scsi_start(sc->sc_link, SSS_START, 0);
                      break;
      
              case CDIOCSTOP:
                      error = scsi_start(sc->sc_link, SSS_STOP, 0);
                      break;
      
      close_tray:
              case CDIOCCLOSE:
                      error = scsi_start(sc->sc_link, SSS_START|SSS_LOEJ,
                          SCSI_IGNORE_NOT_READY | SCSI_IGNORE_MEDIA_CHANGE);
                      break;
      
              case MTIOCTOP:
                      if (((struct mtop *)addr)->mt_op == MTRETEN)
                              goto close_tray;
                      if (((struct mtop *)addr)->mt_op != MTOFFL) {
                              error = EIO;
                              break;
                      }
                      /* FALLTHROUGH */
              case CDIOCEJECT: /* FALLTHROUGH */
              case DIOCEJECT:
                      sc->sc_link->flags |= SDEV_EJECTING;
                      break;
              case CDIOCALLOW:
                      error = scsi_prevent(sc->sc_link, PR_ALLOW, 0);
                      break;
              case CDIOCPREVENT:
                      error = scsi_prevent(sc->sc_link, PR_PREVENT, 0);
                      break;
              case DIOCLOCK:
                      error = scsi_prevent(sc->sc_link,
                          (*(int *)addr) ? PR_PREVENT : PR_ALLOW, 0);
                      break;
              case CDIOCSETDEBUG:
                      sc->sc_link->flags |= (SDEV_DB1 | SDEV_DB2);
                      break;
              case CDIOCCLRDEBUG:
                      sc->sc_link->flags &= ~(SDEV_DB1 | SDEV_DB2);
                      break;
              case CDIOCRESET:
              case SCIOCRESET:
                      error = cd_reset(sc);
                      break;
              case CDIOCLOADUNLOAD: {
                      struct ioc_load_unload *args = (struct ioc_load_unload *)addr;
      
                      error = cd_load_unload(sc, args->options, args->slot);
                      break;
              }
      
              case DVD_AUTH:
                      error = dvd_auth(sc, (union dvd_authinfo *)addr);
                      break;
              case DVD_READ_STRUCT:
                      error = dvd_read_struct(sc, (union dvd_struct *)addr);
                      break;
              default:
                      if (DISKPART(dev) != RAW_PART) {
                              error = ENOTTY;
                              break;
                      }
                      error = scsi_do_ioctl(sc->sc_link, cmd, addr, flag);
                      break;
              }
      
      exit:
      
              device_unref(&sc->sc_dev);
              return (error);
      }
      
      /*
       * Load the label information on the named device
       * Actually fabricate a disklabel
       *
       * EVENTUALLY take information about different
       * data tracks from the TOC and put it in the disklabel
       */
      int
      cdgetdisklabel(dev_t dev, struct cd_softc *sc, struct disklabel *lp,
          int spoofonly)
      {
              struct cd_toc *toc;
              int tocidx, n, audioonly = 1;
      
              bzero(lp, sizeof(struct disklabel));
      
              lp->d_secsize = sc->params.secsize;
              lp->d_ntracks = 1;
              lp->d_nsectors = 100;
              lp->d_secpercyl = 100;
              lp->d_ncylinders = (sc->params.disksize / 100) + 1;
      
              if (sc->sc_link->flags & SDEV_ATAPI) {
                      strncpy(lp->d_typename, "ATAPI CD-ROM", sizeof(lp->d_typename));
                      lp->d_type = DTYPE_ATAPI;
              } else {
                      strncpy(lp->d_typename, "SCSI CD-ROM", sizeof(lp->d_typename));
                      lp->d_type = DTYPE_SCSI;
              }
      
              strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
              DL_SETDSIZE(lp, sc->params.disksize);
              lp->d_version = 1;
      
              /* XXX - these values for BBSIZE and SBSIZE assume ffs */
              lp->d_bbsize = BBSIZE;
              lp->d_sbsize = SBSIZE;
      
              lp->d_magic = DISKMAGIC;
              lp->d_magic2 = DISKMAGIC;
              lp->d_checksum = dkcksum(lp);
      
              toc = dma_alloc(sizeof(*toc), PR_WAITOK | PR_ZERO);
              if (cd_load_toc(sc, toc, CD_LBA_FORMAT)) {
                      audioonly = 0; /* No valid TOC found == not an audio CD. */
                      goto done;
              }
      
              n = toc->header.ending_track - toc->header.starting_track + 1;
              for (tocidx = 0; tocidx < n; tocidx++)
                      if (toc->entries[tocidx].control & 4) {
                              audioonly = 0; /* Found a non-audio track. */
                              goto done;
                      }
      
      done:
              dma_free(toc, sizeof(*toc));
      
              if (audioonly)
                      return (0);
              return readdisklabel(DISKLABELDEV(dev), cdstrategy, lp, spoofonly);
      }
      
      int
      cd_setchan(struct cd_softc *sc, int p0, int p1, int p2, int p3, int flags)
      {
              union scsi_mode_sense_buf *data;
              struct cd_audio_page *audio = NULL;
              int error, big;
      
              data = dma_alloc(sizeof(*data), PR_NOWAIT);
              if (data == NULL)
                      return (ENOMEM);
      
              error = scsi_do_mode_sense(sc->sc_link, AUDIO_PAGE, data,
                  (void **)&audio, NULL, NULL, NULL, sizeof(*audio), flags, &big);
              if (error == 0 && audio == NULL)
                      error = EIO;
      
              if (error == 0) {
                      audio->port[LEFT_PORT].channels = p0;
                      audio->port[RIGHT_PORT].channels = p1;
                      audio->port[2].channels = p2;
                      audio->port[3].channels = p3;
                      if (big)
                              error = scsi_mode_select_big(sc->sc_link, SMS_PF,
                                  &data->hdr_big, flags, 20000);
                      else
                              error = scsi_mode_select(sc->sc_link, SMS_PF,
                                  &data->hdr, flags, 20000);
              }
      
              dma_free(data, sizeof(*data));
              return (error);
      }
      
      int
      cd_getvol(struct cd_softc *sc, struct ioc_vol *arg, int flags)
      {
              union scsi_mode_sense_buf *data;
              struct cd_audio_page *audio = NULL;
              int error;
      
              data = dma_alloc(sizeof(*data), PR_NOWAIT);
              if (data == NULL)
                      return (ENOMEM);
      
              error = scsi_do_mode_sense(sc->sc_link, AUDIO_PAGE, data,
                  (void **)&audio, NULL, NULL, NULL, sizeof(*audio), flags, NULL);
              if (error == 0 && audio == NULL)
                      error = EIO;
      
              if (error == 0) {
                      arg->vol[0] = audio->port[0].volume;
                      arg->vol[1] = audio->port[1].volume;
                      arg->vol[2] = audio->port[2].volume;
                      arg->vol[3] = audio->port[3].volume;
              }
      
              dma_free(data, sizeof(*data));
              return (0);
      }
      
      int
      cd_setvol(struct cd_softc *sc, const struct ioc_vol *arg, int flags)
      {
              union scsi_mode_sense_buf *data;
              struct cd_audio_page *audio = NULL;
              u_int8_t mask_volume[4];
              int error, big;
      
              data = dma_alloc(sizeof(*data), PR_NOWAIT);
              if (data == NULL)
                      return (ENOMEM);
      
              error = scsi_do_mode_sense(sc->sc_link,
                  AUDIO_PAGE | SMS_PAGE_CTRL_CHANGEABLE, data, (void **)&audio, NULL,
                  NULL, NULL, sizeof(*audio), flags, NULL);
              if (error == 0 && audio == NULL)
                      error = EIO;
              if (error != 0) {
                      dma_free(data, sizeof(*data));
                      return (error);
              }
      
              mask_volume[0] = audio->port[0].volume;
              mask_volume[1] = audio->port[1].volume;
              mask_volume[2] = audio->port[2].volume;
              mask_volume[3] = audio->port[3].volume;
      
              error = scsi_do_mode_sense(sc->sc_link, AUDIO_PAGE, data,
                  (void **)&audio, NULL, NULL, NULL, sizeof(*audio), flags, &big);
              if (error == 0 && audio == NULL)
                      error = EIO;
              if (error != 0) {
                      dma_free(data, sizeof(*data));
                      return (error);
              }
      
              audio->port[0].volume = arg->vol[0] & mask_volume[0];
              audio->port[1].volume = arg->vol[1] & mask_volume[1];
              audio->port[2].volume = arg->vol[2] & mask_volume[2];
              audio->port[3].volume = arg->vol[3] & mask_volume[3];
      
              if (big)
                      error = scsi_mode_select_big(sc->sc_link, SMS_PF,
                          &data->hdr_big, flags, 20000);
              else
                      error = scsi_mode_select(sc->sc_link, SMS_PF,
                          &data->hdr, flags, 20000);
      
              dma_free(data, sizeof(*data));
              return (error);
      }
      
      int
      cd_load_unload(struct cd_softc *sc, int options, int slot)
      {
              struct scsi_load_unload *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->timeout = 200000;
      
              cmd = (struct scsi_load_unload *)xs->cmd;
              cmd->opcode = LOAD_UNLOAD;
              cmd->options = options;    /* ioctl uses ATAPI values */
              cmd->slot = slot;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      int
      cd_set_pa_immed(struct cd_softc *sc, int flags)
      {
              union scsi_mode_sense_buf *data;
              struct cd_audio_page *audio = NULL;
              int error, oflags, big;
      
              if (sc->sc_link->flags & SDEV_ATAPI)
                      /* XXX Noop? */
                      return (0);
      
              data = dma_alloc(sizeof(*data), PR_NOWAIT);
              if (data == NULL)
                      return (ENOMEM);
      
              error = scsi_do_mode_sense(sc->sc_link, AUDIO_PAGE, data,
                  (void **)&audio, NULL, NULL, NULL, sizeof(*audio), flags, &big);
              if (error == 0 && audio == NULL)
                      error = EIO;
      
              if (error == 0) {
                      oflags = audio->flags;
                      audio->flags &= ~CD_PA_SOTC;
                      audio->flags |= CD_PA_IMMED;
                      if (audio->flags != oflags) {
                              if (big)
                                      error = scsi_mode_select_big(sc->sc_link,
                                          SMS_PF, &data->hdr_big, flags, 20000);
                              else
                                      error = scsi_mode_select(sc->sc_link, SMS_PF,
                                          &data->hdr, flags, 20000);
                      }
              }
      
              dma_free(data, sizeof(*data));
              return (error);
      }
      
      /*
       * Get scsi driver to send a "start playing" command
       */
      int
      cd_play(struct cd_softc *sc, int secno, int nsecs)
      {
              struct scsi_play *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->timeout = 200000;
      
              cmd = (struct scsi_play *)xs->cmd;
              cmd->opcode = PLAY;
              _lto4b(secno, cmd->blk_addr);
              _lto2b(nsecs, cmd->xfer_len);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Get scsi driver to send a "start playing" command
       */
      int
      cd_play_tracks(struct cd_softc *sc, int strack, int sindex, int etrack,
          int eindex)
      {
              struct cd_toc *toc;
              u_char endf, ends, endm;
              int error;
      
              if (!etrack)
                      return (EIO);
              if (strack > etrack)
                      return (EINVAL);
      
              toc = dma_alloc(sizeof(*toc), PR_WAITOK | PR_ZERO);
      
              if ((error = cd_load_toc(sc, toc, CD_MSF_FORMAT)) != 0)
                      goto done;
      
              if (++etrack > (toc->header.ending_track+1))
                      etrack = toc->header.ending_track+1;
      
              strack -= toc->header.starting_track;
              etrack -= toc->header.starting_track;
              if (strack < 0) {
                      error = EINVAL;
                      goto done;
              }
      
              /*
               * The track ends one frame before the next begins.  The last track
               * is taken care of by the leadoff track.
               */
              endm = toc->entries[etrack].addr.msf.minute;
              ends = toc->entries[etrack].addr.msf.second;
              endf = toc->entries[etrack].addr.msf.frame;
              if (endf-- == 0) {
                      endf = CD_FRAMES - 1;
                      if (ends-- == 0) {
                              ends = CD_SECS - 1;
                              if (endm-- == 0) {
                                      error = EINVAL;
                                      goto done;
                              }
                      }
              }
      
              error = cd_play_msf(sc, toc->entries[strack].addr.msf.minute,
                  toc->entries[strack].addr.msf.second,
                  toc->entries[strack].addr.msf.frame,
                  endm, ends, endf);
      
      done:
              dma_free(toc, sizeof(*toc));
              return (error);
      }
      
      /*
       * Get scsi driver to send a "play msf" command
       */
      int
      cd_play_msf(struct cd_softc *sc, int startm, int starts, int startf, int endm,
          int ends, int endf)
      {
              struct scsi_play_msf *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->timeout = 20000;
      
              cmd = (struct scsi_play_msf *)xs->cmd;
              cmd->opcode = PLAY_MSF;
              cmd->start_m = startm;
              cmd->start_s = starts;
              cmd->start_f = startf;
              cmd->end_m = endm;
              cmd->end_s = ends;
              cmd->end_f = endf;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Get scsi driver to send a "start up" command
       */
      int
      cd_pause(struct cd_softc *sc, int go)
      {
              struct scsi_pause *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->timeout = 2000;
      
              cmd = (struct scsi_pause *)xs->cmd;
              cmd->opcode = PAUSE;
              cmd->resume = go;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Get scsi driver to send a "RESET" command
       */
      int
      cd_reset(struct cd_softc *sc)
      {
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, SCSI_RESET);
              if (xs == NULL)
                      return (ENOMEM);
      
              xs->timeout = 2000;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Read subchannel
       */
      int
      cd_read_subchannel(struct cd_softc *sc, int mode, int format, int track,
          struct cd_sub_channel_info *data, int len)
      {
              struct scsi_read_subchannel *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN | SCSI_SILENT);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->data = (void *)data;
              xs->datalen = len;
              xs->timeout = 5000;
      
              cmd = (struct scsi_read_subchannel *)xs->cmd;
              cmd->opcode = READ_SUBCHANNEL;
              if (mode == CD_MSF_FORMAT)
                      cmd->byte2 |= CD_MSF;
              cmd->byte3 = SRS_SUBQ;
              cmd->subchan_format = format;
              cmd->track = track;
              _lto2b(len, cmd->data_len);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      /*
       * Read table of contents
       */
      int
      cd_read_toc(struct cd_softc *sc, int mode, int start, void *data, int len,
          int control)
      {
              struct scsi_read_toc *cmd;
              struct scsi_xfer *xs;
              int error;
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN |
                  SCSI_IGNORE_ILLEGAL_REQUEST);
              if (xs == NULL)
                      return (ENOMEM);
              xs->cmdlen = sizeof(*cmd);
              xs->data = data;
              xs->datalen = len;
              xs->timeout = 5000;
      
              bzero(data, len);
      
              cmd = (struct scsi_read_toc *)xs->cmd;
              cmd->opcode = READ_TOC;
      
              if (mode == CD_MSF_FORMAT)
                      cmd->byte2 |= CD_MSF;
              cmd->from_track = start;
              _lto2b(len, cmd->data_len);
              cmd->control = control;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              return (error);
      }
      
      int
      cd_load_toc(struct cd_softc *sc, struct cd_toc *toc, int fmt)
      {
              int n, len, error;
      
              error = cd_read_toc(sc, 0, 0, toc, sizeof(toc->header), 0);
      
              if (error == 0) {
                      if (toc->header.ending_track < toc->header.starting_track)
                              return (EIO);
                      /* +2 to account for leading out track. */
                      n = toc->header.ending_track - toc->header.starting_track + 2;
                      len = n * sizeof(struct cd_toc_entry) + sizeof(toc->header);
                      error = cd_read_toc(sc, fmt, 0, toc, len, 0);
              }
      
              return (error);
      }
      
      
      /*
       * Get the scsi driver to send a full inquiry to the device and use the
       * results to fill out the disk parameter structure.
       */
      int
      cd_get_parms(struct cd_softc *sc, int flags)
      {
              /* Reasonable defaults for drives that don't support READ_CAPACITY */
              sc->params.secsize = 2048;
              sc->params.disksize = 400000;
      
              if (sc->sc_link->quirks & ADEV_NOCAPACITY)
                      return (0);
      
              sc->params.disksize = cd_size(sc->sc_link, flags, &sc->params.secsize);
      
              if ((sc->params.secsize < 512) ||
                  ((sc->params.secsize & 511) != 0))
                      sc->params.secsize = 2048;        /* some drives lie ! */
      
              if (sc->params.disksize < 100)
                      sc->params.disksize = 400000;
      
              return (0);
      }
      
      daddr_t
      cdsize(dev_t dev)
      {
      
              /* CD-ROMs are read-only. */
              return -1;
      }
      
      int
      cddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
      {
              /* Not implemented. */
              return ENXIO;
      }
      
      #define        dvd_copy_key(dst, src)                memcpy((dst), (src), DVD_KEY_SIZE)
      #define        dvd_copy_challenge(dst, src)        memcpy((dst), (src), DVD_CHALLENGE_SIZE)
      
      #define DVD_AUTH_BUFSIZE                20
      
      int
      dvd_auth(struct cd_softc *sc, union dvd_authinfo *a)
      {
              struct scsi_generic *cmd;
              struct scsi_xfer *xs;
              u_int8_t *buf;
              int error;
      
              buf = dma_alloc(DVD_AUTH_BUFSIZE, PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, 0);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->timeout = 30000;
              xs->data = buf;
      
              cmd = xs->cmd;
      
              switch (a->type) {
              case DVD_LU_SEND_AGID:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[8] = 8;
                      cmd->bytes[9] = 0 | (0 << 6);
                      xs->datalen = 8;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0)
                              a->lsa.agid = buf[7] >> 6;
                      break;
      
              case DVD_LU_SEND_CHALLENGE:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[8] = 16;
                      cmd->bytes[9] = 1 | (a->lsc.agid << 6);
                      xs->datalen = 16;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
                      if (error == 0)
                              dvd_copy_challenge(a->lsc.chal, &buf[4]);
                      break;
      
              case DVD_LU_SEND_KEY1:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[8] = 12;
                      cmd->bytes[9] = 2 | (a->lsk.agid << 6);
                      xs->datalen = 12;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0)
                              dvd_copy_key(a->lsk.key, &buf[4]);
                      break;
      
              case DVD_LU_SEND_TITLE_KEY:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      _lto4b(a->lstk.lba, &cmd->bytes[1]);
                      cmd->bytes[8] = 12;
                      cmd->bytes[9] = 4 | (a->lstk.agid << 6);
                      xs->datalen = 12;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0) {
                              a->lstk.cpm = (buf[4] >> 7) & 1;
                              a->lstk.cp_sec = (buf[4] >> 6) & 1;
                              a->lstk.cgms = (buf[4] >> 4) & 3;
                              dvd_copy_key(a->lstk.title_key, &buf[5]);
                      }
                      break;
      
              case DVD_LU_SEND_ASF:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[8] = 8;
                      cmd->bytes[9] = 5 | (a->lsasf.agid << 6);
                      xs->datalen = 8;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0)
                              a->lsasf.asf = buf[7] & 1;
                      break;
      
              case DVD_HOST_SEND_CHALLENGE:
                      cmd->opcode = GPCMD_SEND_KEY;
                      cmd->bytes[8] = 16;
                      cmd->bytes[9] = 1 | (a->hsc.agid << 6);
                      buf[1] = 14;
                      dvd_copy_challenge(&buf[4], a->hsc.chal);
                      xs->datalen = 16;
                      xs->flags |= SCSI_DATA_OUT;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0)
                              a->type = DVD_LU_SEND_KEY1;
                      break;
      
              case DVD_HOST_SEND_KEY2:
                      cmd->opcode = GPCMD_SEND_KEY;
                      cmd->bytes[8] = 12;
                      cmd->bytes[9] = 3 | (a->hsk.agid << 6);
                      buf[1] = 10;
                      dvd_copy_key(&buf[4], a->hsk.key);
                      xs->datalen = 12;
                      xs->flags |= SCSI_DATA_OUT;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0)
                              a->type = DVD_AUTH_ESTABLISHED;
                      else
                              a->type = DVD_AUTH_FAILURE;
                      break;
      
              case DVD_INVALIDATE_AGID:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[9] = 0x3f | (a->lsa.agid << 6);
                      xs->data = NULL;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
                      break;
      
              case DVD_LU_SEND_RPC_STATE:
                      cmd->opcode = GPCMD_REPORT_KEY;
                      cmd->bytes[8] = 8;
                      cmd->bytes[9] = 8 | (0 << 6);
                      xs->datalen = 8;
                      xs->flags |= SCSI_DATA_IN;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
      
                      if (error == 0) {
                              a->lrpcs.type = (buf[4] >> 6) & 3;
                              a->lrpcs.vra = (buf[4] >> 3) & 7;
                              a->lrpcs.ucca = (buf[4]) & 7;
                              a->lrpcs.region_mask = buf[5];
                              a->lrpcs.rpc_scheme = buf[6];
                      }
                      break;
      
              case DVD_HOST_SEND_RPC_STATE:
                      cmd->opcode = GPCMD_SEND_KEY;
                      cmd->bytes[8] = 8;
                      cmd->bytes[9] = 6 | (0 << 6);
                      buf[1] = 6;
                      buf[4] = a->hrpcs.pdrc;
                      xs->datalen = 8;
                      xs->flags |= SCSI_DATA_OUT;
      
                      error = scsi_xs_sync(xs);
                      scsi_xs_put(xs);
                      break;
      
              default:
                      scsi_xs_put(xs);
                      error = ENOTTY;
                      break;
              }
      done:
              dma_free(buf, DVD_AUTH_BUFSIZE);
              return (error);
      }
      
      #define DVD_READ_PHYSICAL_BUFSIZE (4 + 4 * 20)
      int
      dvd_read_physical(struct cd_softc *sc, union dvd_struct *s)
      {
              struct scsi_generic *cmd;
              struct dvd_layer *layer;
              struct scsi_xfer *xs;
              u_int8_t *buf, *bufp;
              int error, i;
      
              buf = dma_alloc(DVD_READ_PHYSICAL_BUFSIZE, PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = buf;
              xs->datalen = DVD_READ_PHYSICAL_BUFSIZE;
              xs->timeout = 30000;
      
              cmd = xs->cmd;
              cmd->opcode = GPCMD_READ_DVD_STRUCTURE;
              cmd->bytes[6] = s->type;
              _lto2b(xs->datalen, &cmd->bytes[7]);
      
              cmd->bytes[5] = s->physical.layer_num;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error == 0) {
                      for (i = 0, bufp = &buf[4], layer = &s->physical.layer[0];
                          i < 4; i++, bufp += 20, layer++) {
                              bzero(layer, sizeof(*layer));
                              layer->book_version = bufp[0] & 0xf;
                              layer->book_type = bufp[0] >> 4;
                              layer->min_rate = bufp[1] & 0xf;
                              layer->disc_size = bufp[1] >> 4;
                              layer->layer_type = bufp[2] & 0xf;
                              layer->track_path = (bufp[2] >> 4) & 1;
                              layer->nlayers = (bufp[2] >> 5) & 3;
                              layer->track_density = bufp[3] & 0xf;
                              layer->linear_density = bufp[3] >> 4;
                              layer->start_sector = _4btol(&bufp[4]);
                              layer->end_sector = _4btol(&bufp[8]);
                              layer->end_sector_l0 = _4btol(&bufp[12]);
                              layer->bca = bufp[16] >> 7;
                      }
              }
      done:
              dma_free(buf, DVD_READ_PHYSICAL_BUFSIZE);
              return (error);
      }
      
      #define DVD_READ_COPYRIGHT_BUFSIZE        8
      int
      dvd_read_copyright(struct cd_softc *sc, union dvd_struct *s)
      {
              struct scsi_generic *cmd;
              struct scsi_xfer *xs;
              u_int8_t *buf;
              int error;
      
              buf = dma_alloc(DVD_READ_COPYRIGHT_BUFSIZE, PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = buf;
              xs->datalen = DVD_READ_COPYRIGHT_BUFSIZE;
              xs->timeout = 30000;
      
              cmd = xs->cmd;
              cmd->opcode = GPCMD_READ_DVD_STRUCTURE;
              cmd->bytes[6] = s->type;
              _lto2b(xs->datalen, &cmd->bytes[7]);
      
              cmd->bytes[5] = s->copyright.layer_num;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error == 0) {
                      s->copyright.cpst = buf[4];
                      s->copyright.rmi = buf[5];
              }
      done:
              dma_free(buf, DVD_READ_COPYRIGHT_BUFSIZE);
              return (error);
      }
      
      int
      dvd_read_disckey(struct cd_softc *sc, union dvd_struct *s)
      {
              struct scsi_read_dvd_structure_data *buf;
              struct scsi_read_dvd_structure *cmd;
              struct scsi_xfer *xs;
              int error;
      
              buf = dma_alloc(sizeof(*buf), PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = (void *)buf;
              xs->datalen = sizeof(*buf);
              xs->timeout = 30000;
      
              cmd = (struct scsi_read_dvd_structure *)xs->cmd;
              cmd->opcode = GPCMD_READ_DVD_STRUCTURE;
              cmd->format = s->type;
              cmd->agid = s->disckey.agid << 6;
              _lto2b(xs->datalen, cmd->length);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error == 0)
                      memcpy(s->disckey.value, buf->data, sizeof(s->disckey.value));
      done:
              dma_free(buf, sizeof(*buf));
              return (error);
      }
      
      #define DVD_READ_BCA_BUFLEN (4 + 188)
      
      int
      dvd_read_bca(struct cd_softc *sc, union dvd_struct *s)
      {
              struct scsi_generic *cmd;
              struct scsi_xfer *xs;
              u_int8_t *buf;
              int error;
      
              buf = dma_alloc(DVD_READ_BCA_BUFLEN, PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = buf;
              xs->datalen = DVD_READ_BCA_BUFLEN;
              xs->timeout = 30000;
      
              cmd = xs->cmd;
              cmd->opcode = GPCMD_READ_DVD_STRUCTURE;
              cmd->bytes[6] = s->type;
              _lto2b(xs->datalen, &cmd->bytes[7]);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error == 0) {
                      s->bca.len = _2btol(&buf[0]);
                      if (s->bca.len < 12 || s->bca.len > 188)
                              return (EIO);
                      memcpy(s->bca.value, &buf[4], s->bca.len);
              }
      done:
              dma_free(buf, DVD_READ_BCA_BUFLEN);
              return (error);
      }
      
      int
      dvd_read_manufact(struct cd_softc *sc, union dvd_struct *s)
      {
              struct scsi_read_dvd_structure_data *buf;
              struct scsi_read_dvd_structure *cmd;
              struct scsi_xfer *xs;
              int error;
      
              buf = dma_alloc(sizeof(*buf), PR_WAITOK | PR_ZERO);
              if (buf == NULL)
                      return (ENOMEM);
      
              xs = scsi_xs_get(sc->sc_link, SCSI_DATA_IN);
              if (xs == NULL) {
                      error = ENOMEM;
                      goto done;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = (void *)buf;
              xs->datalen = sizeof(*buf);
              xs->timeout = 30000;
      
              cmd = (struct scsi_read_dvd_structure *)xs->cmd;
              cmd->opcode = GPCMD_READ_DVD_STRUCTURE;
              cmd->format = s->type;
              _lto2b(xs->datalen, cmd->length);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error == 0) {
                      s->manufact.len = _2btol(buf->len);
                      if (s->manufact.len >= 0 && s->manufact.len <= 2048)
                              memcpy(s->manufact.value, buf->data, s->manufact.len);
                      else
                              error = EIO;
              }
      done:
              dma_free(buf, sizeof(*buf));
              return (error);
      }
      
      int
      dvd_read_struct(struct cd_softc *sc, union dvd_struct *s)
      {
      
              switch (s->type) {
              case DVD_STRUCT_PHYSICAL:
                      return (dvd_read_physical(sc, s));
              case DVD_STRUCT_COPYRIGHT:
                      return (dvd_read_copyright(sc, s));
              case DVD_STRUCT_DISCKEY:
                      return (dvd_read_disckey(sc, s));
              case DVD_STRUCT_BCA:
                      return (dvd_read_bca(sc, s));
              case DVD_STRUCT_MANUFACT:
                      return (dvd_read_manufact(sc, s));
              default:
                      return (EINVAL);
              }
      }
      
      int
      cd_interpret_sense(struct scsi_xfer *xs)
      {
              struct scsi_sense_data *sense = &xs->sense;
              struct scsi_link *link = xs->sc_link;
              u_int8_t skey = sense->flags & SSD_KEY;
              u_int8_t serr = sense->error_code & SSD_ERRCODE;
      
              if (((link->flags & SDEV_OPEN) == 0) ||
                  (serr != SSD_ERRCODE_CURRENT && serr != SSD_ERRCODE_DEFERRED))
                      return (scsi_interpret_sense(xs));
      
              /*
               * We do custom processing in cd for the unit becoming ready
               * case.  We do not allow xs->retries to be decremented on the
               * "Unit Becoming Ready" case. This is because CD drives
               * report "Unit Becoming Ready" when loading media and can
               * take a long time.  Rather than having a massive timeout for
               * all operations (which would cause other problems), we allow
               * operations to wait (but be interruptable with Ctrl-C)
               * forever as long as the drive is reporting that it is
               * becoming ready.  All other cases of not being ready are
               * handled by the default handler.
               */
              switch(skey) {
              case SKEY_NOT_READY:
                      if ((xs->flags & SCSI_IGNORE_NOT_READY) != 0)
                              return (0);
                      if (ASC_ASCQ(sense) == SENSE_NOT_READY_BECOMING_READY) {
                              SC_DEBUG(link, SDEV_DB1, ("not ready: busy (%#x)\n",
                                  sense->add_sense_code_qual));
                              /* don't count this as a retry */
                              xs->retries++;
                              return (scsi_delay(xs, 1));
                      }
                      break;
              /* XXX more to come here for a few other cases */
              default:
                      break;
              }
              return (scsi_interpret_sense(xs));
      }
      
      /*
       * Find out from the device what its capacity is.
       */
      u_int64_t
      cd_size(struct scsi_link *link, int flags, u_int32_t *blksize)
      {
              struct scsi_read_cap_data_16 *rdcap16;
              struct scsi_read_capacity_16 *cmd;
              struct scsi_read_cap_data *rdcap;
              struct scsi_read_capacity *cmd10;
              struct scsi_xfer *xs;
              u_int64_t max_addr;
              int error;
      
              if (blksize != NULL)
                      *blksize = 0;
      
              CLR(flags, SCSI_IGNORE_ILLEGAL_REQUEST);
      
              /*
               * Start with a READ CAPACITY(10).
               */
              rdcap = dma_alloc(sizeof(*rdcap), ((flags & SCSI_NOSLEEP) ?
                  PR_NOWAIT : PR_WAITOK) | PR_ZERO);
              if (rdcap == NULL)
                      return (0);
      
              xs = scsi_xs_get(link, flags | SCSI_DATA_IN | SCSI_SILENT);
              if (xs == NULL) {
                      dma_free(rdcap, sizeof(*rdcap));
                      return (0);
              }
              xs->cmdlen = sizeof(*cmd10);
              xs->data = (void *)rdcap;
              xs->datalen = sizeof(*rdcap);
              xs->timeout = 20000;
      
              cmd10 = (struct scsi_read_capacity *)xs->cmd;
              cmd10->opcode = READ_CAPACITY;
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
      
              if (error) {
                      SC_DEBUG(link, SDEV_DB1, ("READ CAPACITY error (%#x)\n",
                          error));
                      dma_free(rdcap, sizeof(*rdcap));
                      return (0);
              }
      
              max_addr = _4btol(rdcap->addr);
              if (blksize != NULL)
                      *blksize = _4btol(rdcap->length);
              dma_free(rdcap, sizeof(*rdcap));
      
              if (SCSISPC(link->inqdata.version) < 3 && max_addr != 0xffffffff)
                      goto exit;
      
              /*
               * SCSI-3 devices, or devices reporting more than 2^32-1 sectors can
               * try READ CAPACITY(16).
               */
              rdcap16 = dma_alloc(sizeof(*rdcap16), ((flags & SCSI_NOSLEEP) ?
                  PR_NOWAIT : PR_WAITOK) | PR_ZERO);
              if (rdcap16 == NULL)
                      goto exit;
      
              xs = scsi_xs_get(link, flags | SCSI_DATA_IN | SCSI_SILENT);
              if (xs == NULL) {
                      dma_free(rdcap16, sizeof(*rdcap16));
                      goto exit;
              }
              xs->cmdlen = sizeof(*cmd);
              xs->data = (void *)rdcap16;
              xs->datalen = sizeof(*rdcap16);
              xs->timeout = 20000;
      
              cmd = (struct scsi_read_capacity_16 *)xs->cmd;
              cmd->opcode = READ_CAPACITY_16;
              cmd->byte2 = SRC16_SERVICE_ACTION;
              _lto4b(sizeof(*rdcap16), cmd->length);
      
              error = scsi_xs_sync(xs);
              scsi_xs_put(xs);
              if (error) {
                      SC_DEBUG(link, SDEV_DB1, ("READ CAPACITY 16 error (%#x)\n",
                          error));
                      dma_free(rdcap16, sizeof(*rdcap16));
                      goto exit;
              }
      
              max_addr = _8btol(rdcap16->addr);
              if (blksize != NULL)
                      *blksize = _4btol(rdcap16->length);
              /* XXX The other READ CAPACITY(16) info could be stored away. */
              dma_free(rdcap16, sizeof(*rdcap16));
      
              return (max_addr + 1);
      
      exit:
              /* Return READ CAPACITY 10 values. */
              if (max_addr != 0xffffffff)
                      return (max_addr + 1);
              else if (blksize != NULL)
                      *blksize = 0;
              return (0);
      }
      
      #if defined(__macppc__)
      int
      cd_eject(void)
      {
              struct cd_softc *sc;
              int error = 0;
      
              if (cd_cd.cd_ndevs == 0 || (sc = cd_cd.cd_devs[0]) == NULL)
                      return (ENXIO);
      
              if ((error = disk_lock(&sc->sc_dk)) != 0)
                      return (error);
      
              if (sc->sc_dk.dk_openmask == 0) {
                      sc->sc_link->flags |= SDEV_EJECTING;
      
                      scsi_prevent(sc->sc_link, PR_ALLOW,
                          SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_NOT_READY |
                          SCSI_SILENT | SCSI_IGNORE_MEDIA_CHANGE);
                      sc->sc_link->flags &= ~SDEV_MEDIA_LOADED;
      
                      scsi_start(sc->sc_link, SSS_STOP|SSS_LOEJ, 0);
      
                      sc->sc_link->flags &= ~SDEV_EJECTING;
              }
              disk_unlock(&sc->sc_dk);
      
              return (error);
      }
      #endif
      /*        $OpenBSD: audio.c,v 1.180 2019/08/17 05:04:56 ratchov Exp $        */
      /*
       * Copyright (c) 2015 Alexandre Ratchov <alex@caoua.org>
       *
       * Permission to use, copy, modify, and distribute this software for any
       * purpose with or without fee is hereby granted, provided that the above
       * copyright notice and this permission notice appear in all copies.
       *
       * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       */
      #include <sys/param.h>
      #include <sys/fcntl.h>
      #include <sys/systm.h>
      #include <sys/ioctl.h>
      #include <sys/conf.h>
      #include <sys/poll.h>
      #include <sys/kernel.h>
      #include <sys/task.h>
      #include <sys/vnode.h>
      #include <sys/malloc.h>
      #include <sys/device.h>
      #include <sys/audioio.h>
      #include <dev/audio_if.h>
      #include <dev/mulaw.h>
      #include "audio.h"
      #include "wskbd.h"
      
      #ifdef AUDIO_DEBUG
      #define DPRINTF(...)                                \
              do {                                        \
                      if (audio_debug)                \
                              printf(__VA_ARGS__);        \
              } while(0)
      #define DPRINTFN(n, ...)                        \
              do {                                        \
                      if (audio_debug > (n))                \
                              printf(__VA_ARGS__);        \
              } while(0)
      #else
      #define DPRINTF(...) do {} while(0)
      #define DPRINTFN(n, ...) do {} while(0)
      #endif
      
      #define DEVNAME(sc)                ((sc)->dev.dv_xname)
      #define AUDIO_UNIT(n)                (minor(n) & 0x0f)
      #define AUDIO_DEV(n)                (minor(n) & 0xf0)
      #define AUDIO_DEV_AUDIO                0        /* minor of /dev/audio0 */
      #define AUDIO_DEV_MIXER                0x10        /* minor of /dev/mixer0 */
      #define AUDIO_DEV_AUDIOCTL        0xc0        /* minor of /dev/audioctl */
      #define AUDIO_BUFSZ                65536        /* buffer size in bytes */
      
      /*
       * mixer entries added by the audio(4) layer
       */
      #define MIXER_RECORD                        0        /* record class */
      #define MIXER_RECORD_ENABLE                1        /* record.enable control */
      #define  MIXER_RECORD_ENABLE_OFF        0        /* record.enable=off value */
      #define  MIXER_RECORD_ENABLE_ON                1        /* record.enable=on value */
      #define  MIXER_RECORD_ENABLE_SYSCTL        2        /* record.enable=sysctl val */
      
      /*
       * dma buffer
       */
      struct audio_buf {
              unsigned char *data;                /* DMA memory block */
              size_t datalen;                        /* size of DMA memory block */
              size_t len;                        /* size of DMA FIFO */
              size_t start;                        /* first byte used in the FIFO */
              size_t used;                        /* bytes used in the FIFO */
              size_t blksz;                        /* DMA block size */
              unsigned int nblks;                /* number of blocks */
              struct selinfo sel;                /* to record & wakeup poll(2) */
              unsigned int pos;                /* bytes transferred */
              unsigned int xrun;                /* bytes lost by xruns */
              int blocking;                        /* read/write blocking */
      };
      
      #if NWSKBD > 0
      struct wskbd_vol
      {
              int val;                        /* index of the value control */
              int mute;                        /* index of the mute control */
              int step;                        /* increment/decrement step */
              int nch;                        /* channels in the value control */
              int val_pending;                /* pending change of val */
              int mute_pending;                /* pending change of mute */
      #define WSKBD_MUTE_TOGGLE        1
      #define WSKBD_MUTE_DISABLE        2
      #define WSKBD_MUTE_ENABLE        3
      };
      #endif
      
      /*
       * device structure
       */
      struct audio_softc {
              struct device dev;
              struct audio_hw_if *ops;        /* driver funcs */
              void *arg;                        /* first arg to driver funcs */
              int mode;                        /* bitmask of AUMODE_* */
              int quiesce;                        /* device suspended */
              struct audio_buf play, rec;
              unsigned int sw_enc;                /* user exposed AUDIO_ENCODING_* */
              unsigned int hw_enc;                /* hardware AUDIO_ENCODING_* */
              unsigned int bits;                /* bits per sample */
              unsigned int bps;                /* bytes-per-sample */
              unsigned int msb;                /* sample are MSB aligned */
              unsigned int rate;                /* rate in Hz */
              unsigned int round;                /* block size in frames */
              unsigned int pchan, rchan;        /* number of channels */
              unsigned char silence[4];        /* a sample of silence */
              int pause;                        /* not trying to start DMA */
              int active;                        /* DMA in process */
              int offs;                        /* offset between play & rec dir */
              void (*conv_enc)(unsigned char *, int);        /* encode to native */
              void (*conv_dec)(unsigned char *, int);        /* decode to user */
              struct mixer_ctrl *mix_ents;        /* mixer state for suspend/resume */
              int mix_nent;                        /* size of mixer state */
      #if NWSKBD > 0
              struct wskbd_vol spkr, mic;
              struct task wskbd_task;
      #endif
              int record_enable;                /* mixer record.enable value */
      };
      
      int audio_match(struct device *, void *, void *);
      void audio_attach(struct device *, struct device *, void *);
      int audio_activate(struct device *, int);
      int audio_detach(struct device *, int);
      void audio_pintr(void *);
      void audio_rintr(void *);
      #if NWSKBD > 0
      void wskbd_mixer_init(struct audio_softc *);
      void wskbd_mixer_cb(void *);
      #endif
      
      const struct cfattach audio_ca = {
              sizeof(struct audio_softc), audio_match, audio_attach,
              audio_detach, audio_activate
      };
      
      struct cfdriver audio_cd = {
              NULL, "audio", DV_DULL
      };
      
      /*
       * This mutex protects data structures (including registers on the
       * sound-card) that are manipulated by both the interrupt handler and
       * syscall code-paths.
       *
       * Note that driver methods may sleep (e.g. in malloc); consequently the
       * audio layer calls them with the mutex unlocked. Driver methods are
       * responsible for locking the mutex when they manipulate data used by
       * the interrupt handler and interrupts may occur.
       *
       * Similarly, the driver is responsible for locking the mutex in its
       * interrupt handler and to call the audio layer call-backs (i.e.
       * audio_{p,r}int()) with the mutex locked.
       */
      struct mutex audio_lock = MUTEX_INITIALIZER(IPL_AUDIO);
      
      /*
       * Global flag to control if audio recording is enabled when the
       * mixerctl setting is record.enable=sysctl
       */
      int audio_record_enable = 0;
      
      #ifdef AUDIO_DEBUG
      /*
       * 0 - nothing, as if AUDIO_DEBUG isn't defined
       * 1 - initialisations & setup
       * 2 - blocks & interrupts
       */
      int audio_debug = 1;
      #endif
      
      unsigned int
      audio_gcd(unsigned int a, unsigned int b)
      {
              unsigned int r;
      
              while (b > 0) {
                      r = a % b;
                      a = b;
                      b = r;
              }
              return a;
      }
      
      int
      audio_buf_init(struct audio_softc *sc, struct audio_buf *buf, int dir)
      {
              if (sc->ops->round_buffersize) {
                      buf->datalen = sc->ops->round_buffersize(sc->arg,
                          dir, AUDIO_BUFSZ);
              } else
                      buf->datalen = AUDIO_BUFSZ;
              if (sc->ops->allocm) {
                      buf->data = sc->ops->allocm(sc->arg, dir, buf->datalen,
                          M_DEVBUF, M_WAITOK);
              } else
                      buf->data = malloc(buf->datalen, M_DEVBUF, M_WAITOK);
              if (buf->data == NULL)
                      return ENOMEM;
              return 0;
      }
      
      void
      audio_buf_done(struct audio_softc *sc, struct audio_buf *buf)
      {
              if (sc->ops->freem)
                      sc->ops->freem(sc->arg, buf->data, M_DEVBUF);
              else
                      free(buf->data, M_DEVBUF, buf->datalen);
      }
      
      /*
       * return the reader pointer and the number of bytes available
       */
      unsigned char *
      audio_buf_rgetblk(struct audio_buf *buf, size_t *rsize)
      {
              size_t count;
      
              count = buf->len - buf->start;
              if (count > buf->used)
                      count = buf->used;
              *rsize = count;
              return buf->data + buf->start;
      }
      
      /*
       * discard "count" bytes at the start position.
       */
      void
      audio_buf_rdiscard(struct audio_buf *buf, size_t count)
      {
      #ifdef AUDIO_DEBUG
              if (count > buf->used) {
                      panic("audio_buf_rdiscard: bad count = %zu, "
                          "start = %zu, used = %zu\n", count, buf->start, buf->used);
              }
      #endif
              buf->used -= count;
              buf->start += count;
              if (buf->start >= buf->len)
                      buf->start -= buf->len;
      }
      
      /*
       * advance the writer pointer by "count" bytes
       */
      void
      audio_buf_wcommit(struct audio_buf *buf, size_t count)
      {
      #ifdef AUDIO_DEBUG
              if (count > (buf->len - buf->used)) {
                      panic("audio_buf_wcommit: bad count = %zu, "
                          "start = %zu, used = %zu\n", count, buf->start, buf->used);
              }
      #endif
              buf->used += count;
      }
      
      /*
       * get writer pointer and the number of bytes writable
       */
      unsigned char *
      audio_buf_wgetblk(struct audio_buf *buf, size_t *rsize)
      {
              size_t end, avail, count;
      
              end = buf->start + buf->used;
              if (end >= buf->len)
                      end -= buf->len;
              avail = buf->len - buf->used;
              count = buf->len - end;
              if (count > avail)
                      count = avail;
              *rsize = count;
              return buf->data + end;
      }
      
      void
      audio_calc_sil(struct audio_softc *sc)
      {
              unsigned char *q;
              unsigned int s, i;
              int d, e;
      
              e = sc->sw_enc;
      #ifdef AUDIO_DEBUG
              switch (e) {
              case AUDIO_ENCODING_SLINEAR_LE:
              case AUDIO_ENCODING_ULINEAR_LE:
              case AUDIO_ENCODING_SLINEAR_BE:
              case AUDIO_ENCODING_ULINEAR_BE:
                      break;
              default:
                      printf("%s: unhandled play encoding %d\n", DEVNAME(sc), e);
                      memset(sc->silence, 0, sc->bps);
                      return;
              }
      #endif
              if (e == AUDIO_ENCODING_SLINEAR_BE || e == AUDIO_ENCODING_ULINEAR_BE) {
                      d = -1;
                      q = sc->silence + sc->bps - 1;
              } else {
                      d = 1;
                      q = sc->silence;
              }
              if (e == AUDIO_ENCODING_SLINEAR_LE || e == AUDIO_ENCODING_SLINEAR_BE) {
                      s = 0;
              } else {
                      s = 0x80000000;
                      if (sc->msb)
                              s >>= 32 - 8 * sc->bps;
                      else
                              s >>= 32 - sc->bits;
              }
              for (i = 0; i < sc->bps; i++) {
                      *q = s;
                      q += d;
                      s >>= 8;
              }
              if (sc->conv_enc)
                      sc->conv_enc(sc->silence, sc->bps);
      }
      
      void
      audio_fill_sil(struct audio_softc *sc, unsigned char *ptr, size_t count)
      {
              unsigned char *q, *p;
              size_t i, j;
      
              q = ptr;
              for (j = count / sc->bps; j > 0; j--) {
                      p = sc->silence;
                      for (i = sc->bps; i > 0; i--)
                              *q++ = *p++;
              }
      }
      
      void
      audio_clear(struct audio_softc *sc)
      {
              if (sc->mode & AUMODE_PLAY) {
                      sc->play.used = sc->play.start = 0;
                      sc->play.pos = sc->play.xrun = 0;
                      audio_fill_sil(sc, sc->play.data, sc->play.len);
              }
              if (sc->mode & AUMODE_RECORD) {
                      sc->rec.used = sc->rec.start = 0;
                      sc->rec.pos = sc->rec.xrun = 0;
                      audio_fill_sil(sc, sc->rec.data, sc->rec.len);
              }
      }
      
      /*
       * called whenever a block is consumed by the driver
       */
      void
      audio_pintr(void *addr)
      {
              struct audio_softc *sc = addr;
              unsigned char *ptr;
              size_t count;
              int error, nblk, todo;
      
              MUTEX_ASSERT_LOCKED(&audio_lock);
              if (!(sc->mode & AUMODE_PLAY) || !sc->active) {
                      printf("%s: play interrupt but not playing\n", DEVNAME(sc));
                      return;
              }
              if (sc->quiesce) {
                      DPRINTF("%s: quiesced, skipping play intr\n", DEVNAME(sc));
                      return;
              }
      
              /*
               * check if record pointer wrapped, see explanation
               * in audio_rintr()
               */
              if ((sc->mode & AUMODE_RECORD) && sc->ops->underrun == NULL) {
                      sc->offs--;
                      nblk = sc->rec.len / sc->rec.blksz;
                      todo = -sc->offs;
                      if (todo >= nblk) {
                              todo -= todo % nblk;
                              DPRINTFN(1, "%s: rec ptr wrapped, moving %d blocks\n",
                                  DEVNAME(sc), todo);
                              while (todo-- > 0)
                                      audio_rintr(sc);
                      }
              }
      
              sc->play.pos += sc->play.blksz;
              if (!sc->ops->underrun) {
                      audio_fill_sil(sc, sc->play.data + sc->play.start,
                          sc->play.blksz);
              }
              audio_buf_rdiscard(&sc->play, sc->play.blksz);
              if (sc->play.used < sc->play.blksz) {
                      DPRINTFN(1, "%s: play underrun\n", DEVNAME(sc));
                      sc->play.xrun += sc->play.blksz;
                      audio_buf_wcommit(&sc->play, sc->play.blksz);
                      if (sc->ops->underrun)
                              sc->ops->underrun(sc->arg);
              }
      
              DPRINTFN(1, "%s: play intr, used -> %zu, start -> %zu\n",
                  DEVNAME(sc), sc->play.used, sc->play.start);
      
              if (!sc->ops->trigger_output) {
                      ptr = audio_buf_rgetblk(&sc->play, &count);
                      error = sc->ops->start_output(sc->arg,
                          ptr, sc->play.blksz, audio_pintr, sc);
                      if (error) {
                              printf("%s: play restart failed: %d\n",
                                  DEVNAME(sc), error);
                      }
              }
      
              if (sc->play.used < sc->play.len) {
                      DPRINTFN(1, "%s: play wakeup, chan = %d\n",
                          DEVNAME(sc), sc->play.blocking);
                      if (sc->play.blocking) {
                              wakeup(&sc->play.blocking);
                              sc->play.blocking = 0;
                      }
                      selwakeup(&sc->play.sel);
              }
      }
      
      /*
       * called whenever a block is produced by the driver
       */
      void
      audio_rintr(void *addr)
      {
              struct audio_softc *sc = addr;
              unsigned char *ptr;
              size_t count;
              int error, nblk, todo;
      
              MUTEX_ASSERT_LOCKED(&audio_lock);
              if (!(sc->mode & AUMODE_RECORD) || !sc->active) {
                      printf("%s: rec interrupt but not recording\n", DEVNAME(sc));
                      return;
              }
              if (sc->quiesce) {
                      DPRINTF("%s: quiesced, skipping rec intr\n", DEVNAME(sc));
                      return;
              }
      
              /*
               * Interrupts may be masked by other sub-systems during 320ms
               * and more. During such a delay the hardware doesn't stop
               * playing and the play buffer pointers may wrap, this can't be
               * detected and corrected by low level drivers. This makes the
               * record stream ahead of the play stream; this is detected as a
               * hardware anomaly by userland and cause programs to misbehave.
               *
               * We fix this by advancing play position by an integer count of
               * full buffers, so it reaches the record position.
               */
              if ((sc->mode & AUMODE_PLAY) && sc->ops->underrun == NULL) {
                      sc->offs++;
                      nblk = sc->play.len / sc->play.blksz;
                      todo = sc->offs;
                      if (todo >= nblk) {
                              todo -= todo % nblk;
                              DPRINTFN(1, "%s: play ptr wrapped, moving %d blocks\n",
                                  DEVNAME(sc), todo);
                              while (todo-- > 0)
                                      audio_pintr(sc);
                      }
              }
      
              sc->rec.pos += sc->rec.blksz;
              if ((sc->record_enable == MIXER_RECORD_ENABLE_SYSCTL &&
                      !audio_record_enable) ||
                  sc->record_enable == MIXER_RECORD_ENABLE_OFF) {
                      ptr = audio_buf_wgetblk(&sc->rec, &count);
                      audio_fill_sil(sc, ptr, sc->rec.blksz);
              }
              audio_buf_wcommit(&sc->rec, sc->rec.blksz);
              if (sc->rec.used > sc->rec.len - sc->rec.blksz) {
                      DPRINTFN(1, "%s: rec overrun\n", DEVNAME(sc));
                      sc->rec.xrun += sc->rec.blksz;
                      audio_buf_rdiscard(&sc->rec, sc->rec.blksz);
              }
              DPRINTFN(1, "%s: rec intr, used -> %zu\n", DEVNAME(sc), sc->rec.used);
      
              if (!sc->ops->trigger_input) {
                      ptr = audio_buf_wgetblk(&sc->rec, &count);
                      error = sc->ops->start_input(sc->arg,
                          ptr, sc->rec.blksz, audio_rintr, sc);
                      if (error) {
                              printf("%s: rec restart failed: %d\n",
                                  DEVNAME(sc), error);
                      }
              }
      
              if (sc->rec.used > 0) {
                      DPRINTFN(1, "%s: rec wakeup, chan = %d\n",
                          DEVNAME(sc), sc->rec.blocking);
                      if (sc->rec.blocking) {
                              wakeup(&sc->rec.blocking);
                              sc->rec.blocking = 0;
                      }
                      selwakeup(&sc->rec.sel);
              }
      }
      
      int
      audio_start_do(struct audio_softc *sc)
      {
              int error;
              struct audio_params p;
              unsigned char *ptr;
              size_t count;
      
              DPRINTF("%s: starting\n", DEVNAME(sc));
      
              error = 0;
              sc->offs = 0;
              if (sc->mode & AUMODE_PLAY) {
                      if (sc->ops->trigger_output) {
                              p.encoding = sc->hw_enc;
                              p.precision = sc->bits;
                              p.bps = sc->bps;
                              p.msb = sc->msb;
                              p.sample_rate = sc->rate;
                              p.channels = sc->pchan;
                              error = sc->ops->trigger_output(sc->arg,
                                  sc->play.data,
                                  sc->play.data + sc->play.len,
                                  sc->play.blksz,
                                  audio_pintr, sc, &p);
                      } else {
                              mtx_enter(&audio_lock);
                              ptr = audio_buf_rgetblk(&sc->play, &count);
                              error = sc->ops->start_output(sc->arg,
                                  ptr, sc->play.blksz, audio_pintr, sc);
                              mtx_leave(&audio_lock);
                      }
                      if (error)
                              printf("%s: failed to start playback\n", DEVNAME(sc));
              }
              if (sc->mode & AUMODE_RECORD) {
                      if (sc->ops->trigger_input) {
                              p.encoding = sc->hw_enc;
                              p.precision = sc->bits;
                              p.bps = sc->bps;
                              p.msb = sc->msb;
                              p.sample_rate = sc->rate;
                              p.channels = sc->rchan;
                              error = sc->ops->trigger_input(sc->arg,
                                  sc->rec.data,
                                  sc->rec.data + sc->rec.len,
                                  sc->rec.blksz,
                                  audio_rintr, sc, &p);
                      } else {
                              mtx_enter(&audio_lock);
                              ptr = audio_buf_wgetblk(&sc->rec, &count);
                              error = sc->ops->start_input(sc->arg,
                                  ptr, sc->rec.blksz, audio_rintr, sc);
                              mtx_leave(&audio_lock);
                      }
                      if (error)
                              printf("%s: failed to start recording\n", DEVNAME(sc));
              }
              return error;
      }
      
      int
      audio_stop_do(struct audio_softc *sc)
      {
              if (sc->mode & AUMODE_PLAY)
                      sc->ops->halt_output(sc->arg);
              if (sc->mode & AUMODE_RECORD)
                      sc->ops->halt_input(sc->arg);
              DPRINTF("%s: stopped\n", DEVNAME(sc));
              return 0;
      }
      
      int
      audio_start(struct audio_softc *sc)
      {
              sc->active = 1;
              sc->play.xrun = sc->play.pos = sc->rec.xrun = sc->rec.pos = 0;
              return audio_start_do(sc);
      }
      
      int
      audio_stop(struct audio_softc *sc)
      {
              int error;
      
              error = audio_stop_do(sc);
              if (error)
                      return error;
              audio_clear(sc);
              sc->active = 0;
              return 0;
      }
      
      int
      audio_canstart(struct audio_softc *sc)
      {
              if (sc->active || sc->pause)
                      return 0;
              if ((sc->mode & AUMODE_RECORD) && sc->rec.used != 0)
                      return 0;
              if ((sc->mode & AUMODE_PLAY) && sc->play.used != sc->play.len)
                      return 0;
              return 1;
      }
      
      int
      audio_setpar_blksz(struct audio_softc *sc)
      {
              unsigned int nr, np, max, min, mult;
              unsigned int blk_mult, blk_max;
      
              /*
               * get least multiplier of the number of frames per block
               */
              if (sc->ops->round_blocksize) {
                      blk_mult = sc->ops->round_blocksize(sc->arg, 1);
                      if (blk_mult == 0) {
                              printf("%s: 0x%x: bad block size multiplier\n",
                                  DEVNAME(sc), blk_mult);
                              return ENODEV;
                      }
              } else
                      blk_mult = 1;
              DPRINTF("%s: hw block size multiplier: %u\n", DEVNAME(sc), blk_mult);
              if (sc->mode & AUMODE_PLAY) {
                      np = blk_mult / audio_gcd(sc->pchan * sc->bps, blk_mult);
                      if (!(sc->mode & AUMODE_RECORD))
                              nr = np;
                      DPRINTF("%s: play number of frames multiplier: %u\n",
                          DEVNAME(sc), np);
              }
              if (sc->mode & AUMODE_RECORD) {
                      nr = blk_mult / audio_gcd(sc->rchan * sc->bps, blk_mult);
                      if (!(sc->mode & AUMODE_PLAY))
                              np = nr;
                      DPRINTF("%s: record number of frames multiplier: %u\n",
                          DEVNAME(sc), nr);
              }
              mult = nr * np / audio_gcd(nr, np);
              DPRINTF("%s: least common number of frames multiplier: %u\n",
                  DEVNAME(sc), mult);
      
              /*
               * get minimum and maximum frames per block
               */
              if (sc->ops->round_blocksize)
                      blk_max = sc->ops->round_blocksize(sc->arg, AUDIO_BUFSZ);
              else
                      blk_max = AUDIO_BUFSZ;
              if ((sc->mode & AUMODE_PLAY) && blk_max > sc->play.datalen / 2)
                      blk_max = sc->play.datalen / 2;
              if ((sc->mode & AUMODE_RECORD) && blk_max > sc->rec.datalen / 2)
                      blk_max = sc->rec.datalen / 2;
              if (sc->mode & AUMODE_PLAY) {
                      np = blk_max / (sc->pchan * sc->bps);
                      if (!(sc->mode & AUMODE_RECORD))
                              nr = np;
              }
              if (sc->mode & AUMODE_RECORD) {
                      nr = blk_max / (sc->rchan * sc->bps);
                      if (!(sc->mode & AUMODE_PLAY))
                              np = nr;
              }
              max = np < nr ? np : nr;
              max -= max % mult;
              min = sc->rate / 1000 + mult - 1;
              min -= min % mult;
              DPRINTF("%s: frame number range: %u..%u\n", DEVNAME(sc), min, max);
              if (max < min) {
                      printf("%s: %u: bad max frame number\n", DEVNAME(sc), max);
                      return EIO;
              }
      
              /*
               * adjust the frame per block to match our constraints
               */
              sc->round += mult / 2;
              sc->round -= sc->round % mult;
              if (sc->round > max)
                      sc->round = max;
              else if (sc->round < min)
                      sc->round = min;
      
              return 0;
      }
      
      int
      audio_setpar_nblks(struct audio_softc *sc)
      {
              unsigned int max;
      
              /*
               * set buffer size (number of blocks)
               */
              if (sc->mode & AUMODE_PLAY) {
                      max = sc->play.datalen / (sc->round * sc->pchan * sc->bps);
                      if (sc->play.nblks > max)
                              sc->play.nblks = max;
                      else if (sc->play.nblks < 2)
                              sc->play.nblks = 2;
              }
              if (sc->mode & AUMODE_RECORD) {
                      /*
                       * for recording, buffer size is not the latency (it's
                       * exactly one block), so let's get the maximum buffer
                       * size of maximum reliability during xruns
                       */
                      max = sc->rec.datalen / (sc->round * sc->rchan * sc->bps);
                      sc->rec.nblks = max;
              }
              return 0;
      }
      
      int
      audio_setpar(struct audio_softc *sc)
      {
              struct audio_params p, r;
              int error;
      
              DPRINTF("%s: setpar: req enc=%d bits=%d, bps=%d, msb=%d "
                  "rate=%d, pchan=%d, rchan=%d, round=%u, nblks=%d\n",
                  DEVNAME(sc), sc->sw_enc, sc->bits, sc->bps, sc->msb,
                  sc->rate, sc->pchan, sc->rchan, sc->round, sc->play.nblks);
      
              /*
               * check if requested parameters are in the allowed ranges
               */
              if (sc->mode & AUMODE_PLAY) {
                      if (sc->pchan < 1)
                              sc->pchan = 1;
                      else if (sc->pchan > 64)
                              sc->pchan = 64;
              }
              if (sc->mode & AUMODE_RECORD) {
                      if (sc->rchan < 1)
                              sc->rchan = 1;
                      else if (sc->rchan > 64)
                              sc->rchan = 64;
              }
              switch (sc->sw_enc) {
              case AUDIO_ENCODING_ULAW:
              case AUDIO_ENCODING_ALAW:
              case AUDIO_ENCODING_SLINEAR_LE:
              case AUDIO_ENCODING_SLINEAR_BE:
              case AUDIO_ENCODING_ULINEAR_LE:
              case AUDIO_ENCODING_ULINEAR_BE:
                      break;
              default:
                      sc->sw_enc = AUDIO_ENCODING_SLINEAR_LE;
              }
              if (sc->bits < 8)
                      sc->bits = 8;
              else if (sc->bits > 32)
                      sc->bits = 32;
              if (sc->bps < 1)
                      sc->bps = 1;
              else if (sc->bps > 4)
                      sc->bps = 4;
              if (sc->rate < 4000)
                      sc->rate = 4000;
              else if (sc->rate > 192000)
                      sc->rate = 192000;
      
              /*
               * copy into struct audio_params, required by drivers
               */
              p.encoding = r.encoding = sc->sw_enc;
              p.precision = r.precision = sc->bits;
              p.bps = r.bps = sc->bps;
              p.msb = r.msb = sc->msb;
              p.sample_rate = r.sample_rate = sc->rate;
              p.channels = sc->pchan;
              r.channels = sc->rchan;
      
              /*
               * set parameters
               */
              error = sc->ops->set_params(sc->arg, sc->mode, sc->mode, &p, &r);
              if (error)
                      return error;
              if (sc->mode == (AUMODE_PLAY | AUMODE_RECORD)) {
                      if (p.encoding != r.encoding ||
                          p.precision != r.precision ||
                          p.bps != r.bps ||
                          p.msb != r.msb ||
                          p.sample_rate != r.sample_rate) {
                              printf("%s: different play and record parameters "
                                  "returned by hardware\n", DEVNAME(sc));
                              return ENODEV;
                      }
              }
              if (sc->mode & AUMODE_PLAY) {
                      sc->hw_enc = p.encoding;
                      sc->bits = p.precision;
                      sc->bps = p.bps;
                      sc->msb = p.msb;
                      sc->rate = p.sample_rate;
                      sc->pchan = p.channels;
              }
              if (sc->mode & AUMODE_RECORD) {
                      sc->hw_enc = r.encoding;
                      sc->bits = r.precision;
                      sc->bps = r.bps;
                      sc->msb = r.msb;
                      sc->rate = r.sample_rate;
                      sc->rchan = r.channels;
              }
              if (sc->rate == 0 || sc->bps == 0 || sc->bits == 0) {
                      printf("%s: invalid parameters returned by hardware\n",
                          DEVNAME(sc));
                      return ENODEV;
              }
              if (sc->ops->commit_settings) {
                      error = sc->ops->commit_settings(sc->arg);
                      if (error)
                              return error;
              }
      
              /*
               * conversion from/to exotic/dead encoding, for drivers not supporting
               * linear
               */
              switch (sc->hw_enc) {
              case AUDIO_ENCODING_SLINEAR_LE:
              case AUDIO_ENCODING_SLINEAR_BE:
              case AUDIO_ENCODING_ULINEAR_LE:
              case AUDIO_ENCODING_ULINEAR_BE:
                      sc->sw_enc = sc->hw_enc;
                      sc->conv_dec = sc->conv_enc = NULL;
                      break;
              case AUDIO_ENCODING_ULAW:
      #if BYTE_ORDER == LITTLE_ENDIAN
                      sc->sw_enc = AUDIO_ENCODING_SLINEAR_LE;
      #else
                      sc->sw_enc = AUDIO_ENCODING_SLINEAR_BE;
      #endif
                      if (sc->bits == 8) {
                              sc->conv_enc = slinear8_to_mulaw;
                              sc->conv_dec = mulaw_to_slinear8;
                      } else if (sc->bits == 24) {
                              sc->conv_enc = slinear24_to_mulaw24;
                              sc->conv_dec = mulaw24_to_slinear24;
                      } else {
                              sc->sw_enc = sc->hw_enc;
                              sc->conv_dec = sc->conv_enc = NULL;
                      }
                      break;
              default:
                      printf("%s: setpar: enc = %d, bits = %d: emulation skipped\n",
                          DEVNAME(sc), sc->hw_enc, sc->bits);
                      sc->sw_enc = sc->hw_enc;
                      sc->conv_dec = sc->conv_enc = NULL;
              }
              audio_calc_sil(sc);
      
              error = audio_setpar_blksz(sc);
              if (error)
                      return error;
      
              error = audio_setpar_nblks(sc);
              if (error)
                      return error;
      
              /*
               * set buffer
               */
              if (sc->mode & AUMODE_PLAY) {
                      sc->play.blksz = sc->round * sc->pchan * sc->bps;
                      sc->play.len = sc->play.nblks * sc->play.blksz;
              }
              if (sc->mode & AUMODE_RECORD) {
                      sc->rec.blksz = sc->round * sc->rchan * sc->bps;
                      sc->rec.len = sc->rec.nblks * sc->rec.blksz;
              }
      
              DPRINTF("%s: setpar: new enc=%d bits=%d, bps=%d, msb=%d "
                  "rate=%d, pchan=%d, rchan=%d, round=%u, nblks=%d\n",
                  DEVNAME(sc), sc->sw_enc, sc->bits, sc->bps, sc->msb,
                  sc->rate, sc->pchan, sc->rchan, sc->round, sc->play.nblks);
              return 0;
      }
      
      int
      audio_ioc_start(struct audio_softc *sc)
      {
              if (!sc->pause) {
                      DPRINTF("%s: can't start: already started\n", DEVNAME(sc));
                      return EBUSY;
              }
              if ((sc->mode & AUMODE_PLAY) && sc->play.used != sc->play.len) {
                      DPRINTF("%s: play buffer not ready\n", DEVNAME(sc));
                      return EBUSY;
              }
              if ((sc->mode & AUMODE_RECORD) && sc->rec.used != 0) {
                      DPRINTF("%s: record buffer not ready\n", DEVNAME(sc));
                      return EBUSY;
              }
              sc->pause = 0;
              return audio_start(sc);
      }
      
      int
      audio_ioc_stop(struct audio_softc *sc)
      {
              if (sc->pause) {
                      DPRINTF("%s: can't stop: not started\n", DEVNAME(sc));
                      return EBUSY;
              }
              sc->pause = 1;
              if (sc->active)
                      return audio_stop(sc);
              return 0;
      }
      
      int
      audio_ioc_getpar(struct audio_softc *sc, struct audio_swpar *p)
      {
              p->rate = sc->rate;
              p->sig = sc->sw_enc == AUDIO_ENCODING_SLINEAR_LE ||
                  sc->sw_enc == AUDIO_ENCODING_SLINEAR_BE;
              p->le = sc->sw_enc == AUDIO_ENCODING_SLINEAR_LE ||
                  sc->sw_enc == AUDIO_ENCODING_ULINEAR_LE;
              p->bits = sc->bits;
              p->bps = sc->bps;
              p->msb = sc->msb;
              p->pchan = sc->pchan;
              p->rchan = sc->rchan;
              p->nblks = sc->play.nblks;
              p->round = sc->round;
              return 0;
      }
      
      int
      audio_ioc_setpar(struct audio_softc *sc, struct audio_swpar *p)
      {
              int error, le, sig;
      
              if (sc->active) {
                      DPRINTF("%s: can't change params during dma\n",
                          DEVNAME(sc));
                      return EBUSY;
              }
      
              /*
               * copy desired parameters into the softc structure
               */
              if (p->sig != ~0U || p->le != ~0U || p->bits != ~0U) {
                      sig = 1;
                      le = (BYTE_ORDER == LITTLE_ENDIAN);
                      sc->bits = 16;
                      sc->bps = 2;
                      sc->msb = 1;
                      if (p->sig != ~0U)
                              sig = p->sig;
                      if (p->le != ~0U)
                              le = p->le;
                      if (p->bits != ~0U) {
                              sc->bits = p->bits;
                              sc->bps = sc->bits <= 8 ?
                                  1 : (sc->bits <= 16 ? 2 : 4);
                              if (p->bps != ~0U)
                                      sc->bps = p->bps;
                              if (p->msb != ~0U)
                                      sc->msb = p->msb ? 1 : 0;
                      }
                      sc->sw_enc = (sig) ?
                          (le ? AUDIO_ENCODING_SLINEAR_LE :
                              AUDIO_ENCODING_SLINEAR_BE) :
                          (le ? AUDIO_ENCODING_ULINEAR_LE :
                              AUDIO_ENCODING_ULINEAR_BE);
              }
              if (p->rate != ~0)
                      sc->rate = p->rate;
              if (p->pchan != ~0)
                      sc->pchan = p->pchan;
              if (p->rchan != ~0)
                      sc->rchan = p->rchan;
              if (p->round != ~0)
                      sc->round = p->round;
              if (p->nblks != ~0)
                      sc->play.nblks = p->nblks;
      
              /*
               * if the device is not opened for playback or recording don't
               * touch the hardware yet (ex. if this is /dev/audioctlN)
               */
              if (sc->mode == 0)
                      return 0;
      
              /*
               * negociate parameters with the hardware
               */
              error = audio_setpar(sc);
              if (error)
                      return error;
              audio_clear(sc);
              if ((sc->mode & AUMODE_PLAY) && sc->ops->init_output) {
                      error = sc->ops->init_output(sc->arg,
                          sc->play.data, sc->play.len);
                      if (error)
                              return error;
              }
              if ((sc->mode & AUMODE_RECORD) && sc->ops->init_input) {
                      error = sc->ops->init_input(sc->arg,
                          sc->rec.data, sc->rec.len);
                      if (error)
                              return error;
              }
              return 0;
      }
      
      int
      audio_ioc_getstatus(struct audio_softc *sc, struct audio_status *p)
      {
              p->mode = sc->mode;
              p->pause = sc->pause;
              p->active = sc->active;
              return 0;
      }
      
      int
      audio_match(struct device *parent, void *match, void *aux)
      {
              struct audio_attach_args *sa = aux;
      
              return (sa->type == AUDIODEV_TYPE_AUDIO) ? 1 : 0;
      }
      
      void
      audio_attach(struct device *parent, struct device *self, void *aux)
      {
              struct audio_softc *sc = (void *)self;
              struct audio_attach_args *sa = aux;
              struct audio_hw_if *ops = sa->hwif;
              struct mixer_devinfo *mi;
              struct mixer_ctrl *ent;
              void *arg = sa->hdl;
              int error;
      
              printf("\n");
      
      #ifdef DIAGNOSTIC
              if (ops == 0 ||
                  ops->open == 0 ||
                  ops->close == 0 ||
                  ops->set_params == 0 ||
                  (ops->start_output == 0 && ops->trigger_output == 0) ||
                  (ops->start_input == 0 && ops->trigger_input == 0) ||
                  ops->halt_output == 0 ||
                  ops->halt_input == 0 ||
                  ops->set_port == 0 ||
                  ops->get_port == 0 ||
                  ops->query_devinfo == 0 ||
                  ops->get_props == 0) {
                      printf("%s: missing method\n", DEVNAME(sc));
                      sc->ops = 0;
                      return;
              }
      #endif
              sc->ops = ops;
              sc->arg = arg;
      
      #if NWSKBD > 0
              wskbd_mixer_init(sc);
      #endif /* NWSKBD > 0 */
      
              error = audio_buf_init(sc, &sc->play, AUMODE_PLAY);
              if (error) {
                      sc->ops = 0;
                      printf("%s: could not allocate play buffer\n", DEVNAME(sc));
                      return;
              }
              error = audio_buf_init(sc, &sc->rec, AUMODE_RECORD);
              if (error) {
                      audio_buf_done(sc, &sc->play);
                      sc->ops = 0;
                      printf("%s: could not allocate record buffer\n", DEVNAME(sc));
                      return;
              }
      
              /* set defaults */
      #if BYTE_ORDER == LITTLE_ENDIAN
              sc->sw_enc = AUDIO_ENCODING_SLINEAR_LE;
      #else
              sc->sw_enc = AUDIO_ENCODING_SLINEAR_BE;
      #endif
              sc->bits = 16;
              sc->bps = 2;
              sc->msb = 1;
              sc->rate = 48000;
              sc->pchan = 2;
              sc->rchan = 2;
              sc->round = 960;
              sc->play.nblks = 2;
              sc->play.pos = sc->play.xrun = sc->rec.pos = sc->rec.xrun = 0;
              sc->record_enable = MIXER_RECORD_ENABLE_SYSCTL;
      
              /*
               * allocate an array of mixer_ctrl structures to save the
               * mixer state and prefill them.
               */
      
              mi = malloc(sizeof(struct mixer_devinfo), M_TEMP, M_WAITOK);
      
              mi->index = 0;
              while (1) {
                      if (sc->ops->query_devinfo(sc->arg, mi) != 0)
                              break;
                      mi->index++;
              }
              sc->mix_nent = mi->index;
              sc->mix_ents = mallocarray(sc->mix_nent,
                  sizeof(struct mixer_ctrl), M_DEVBUF, M_WAITOK);
      
              ent = sc->mix_ents;
              mi->index = 0;
              while (1) {
                      if (sc->ops->query_devinfo(sc->arg, mi) != 0)
                              break;
                      switch (mi->type) {
                      case AUDIO_MIXER_VALUE:
                              ent->un.value.num_channels = mi->un.v.num_channels;
                              /* FALLTHROUGH */
                      case AUDIO_MIXER_SET:
                      case AUDIO_MIXER_ENUM:
                              ent->dev = mi->index;
                              ent->type = mi->type;
                      }
                      mi->index++;
                      ent++;
              }
      
              free(mi, M_TEMP, sizeof(struct mixer_devinfo));
      }
      
      int
      audio_activate(struct device *self, int act)
      {
              struct audio_softc *sc = (struct audio_softc *)self;
              int i;
      
              switch (act) {
              case DVACT_QUIESCE:
                      /*
                       * good drivers run play and rec handlers in a single
                       * interrupt. Grab the lock to ensure we expose the same
                       * sc->quiesce value to both play and rec handlers
                       */
                      mtx_enter(&audio_lock);
                      sc->quiesce = 1;
                      mtx_leave(&audio_lock);
      
                      /*
                       * once sc->quiesce is set, interrupts may occur, but
                       * counters are not advanced and consequently processes
                       * keep sleeping.
                       *
                       * XXX: ensure read/write/ioctl don't start/stop
                       * DMA at the same time, this needs a "ready" condvar
                       */
                      if (sc->mode != 0 && sc->active)
                              audio_stop_do(sc);
      
                      /*
                       * save mixer state
                       */
                      for (i = 0; i != sc->mix_nent; i++)
                              sc->ops->get_port(sc->arg, sc->mix_ents + i);
      
                      DPRINTF("%s: quiesce: active = %d\n", DEVNAME(sc), sc->active);
                      break;
              case DVACT_WAKEUP:
                      DPRINTF("%s: wakeup: active = %d\n", DEVNAME(sc), sc->active);
      
                      /*
                       * restore mixer state
                       */
                      for (i = 0; i != sc->mix_nent; i++)
                              sc->ops->set_port(sc->arg, sc->mix_ents + i);
      
                      /*
                       * keep buffer usage the same, but set start pointer to
                       * the beginning of the buffer.
                       *
                       * No need to grab the audio_lock as DMA is stopped and
                       * this is the only thread running (caller ensures this)
                       */
                      sc->quiesce = 0;
                      wakeup(&sc->quiesce);
      
                      if (sc->mode != 0) {
                              if (audio_setpar(sc) != 0)
                                      break;
                              if (sc->mode & AUMODE_PLAY) {
                                      sc->play.start = 0;
                                      audio_fill_sil(sc, sc->play.data, sc->play.len);
                              }
                              if (sc->mode & AUMODE_RECORD) {
                                      sc->rec.start = sc->rec.len - sc->rec.used;
                                      audio_fill_sil(sc, sc->rec.data, sc->rec.len);
                              }
                              if (sc->active)
                                      audio_start_do(sc);
                      }
                      break;
              }
              return 0;
      }
      
      int
      audio_detach(struct device *self, int flags)
      {
              struct audio_softc *sc = (struct audio_softc *)self;
              int maj, mn;
      
              DPRINTF("%s: audio_detach: flags = %d\n", DEVNAME(sc), flags);
      
              wakeup(&sc->quiesce);
      
              /* locate the major number */
              for (maj = 0; maj < nchrdev; maj++)
                      if (cdevsw[maj].d_open == audioopen)
                              break;
              /*
               * Nuke the vnodes for any open instances, calls close but as
               * close uses device_lookup, it returns EXIO and does nothing
               */
              mn = self->dv_unit;
              vdevgone(maj, mn | AUDIO_DEV_AUDIO, mn | AUDIO_DEV_AUDIO, VCHR);
              vdevgone(maj, mn | AUDIO_DEV_AUDIOCTL, mn | AUDIO_DEV_AUDIOCTL, VCHR);
              vdevgone(maj, mn | AUDIO_DEV_MIXER, mn | AUDIO_DEV_MIXER, VCHR);
      
              /*
               * The close() method did nothing, quickly halt DMA (normally
               * parent is already gone, and code below is no-op), and wake-up
               * user-land blocked in read/write/ioctl, which return EIO.
               */
              if (sc->mode != 0) {
                      if (sc->active) {
                              wakeup(&sc->play.blocking);
                              selwakeup(&sc->play.sel);
                              wakeup(&sc->rec.blocking);
                              selwakeup(&sc->rec.sel);
                              audio_stop(sc);
                      }
                      sc->ops->close(sc->arg);
                      sc->mode = 0;
              }
      
              /* free resources */
              free(sc->mix_ents, M_DEVBUF, sc->mix_nent * sizeof(struct mixer_ctrl));
              audio_buf_done(sc, &sc->play);
              audio_buf_done(sc, &sc->rec);
              return 0;
      }
      
      int
      audio_submatch(struct device *parent, void *match, void *aux)
      {
              struct cfdata *cf = match;
      
              return (cf->cf_driver == &audio_cd);
      }
      
      struct device *
      audio_attach_mi(struct audio_hw_if *ops, void *arg, struct device *dev)
      {
              struct audio_attach_args aa;
      
              aa.type = AUDIODEV_TYPE_AUDIO;
              aa.hwif = ops;
              aa.hdl = arg;
      
              /*
               * attach this driver to the caller (hardware driver), this
               * checks the kernel config and possibly calls audio_attach()
               */
              return config_found_sm(dev, &aa, audioprint, audio_submatch);
      }
      
      int
      audioprint(void *aux, const char *pnp)
      {
              struct audio_attach_args *arg = aux;
              const char *type;
      
              if (pnp != NULL) {
                      switch (arg->type) {
                      case AUDIODEV_TYPE_AUDIO:
                              type = "audio";
                              break;
                      case AUDIODEV_TYPE_OPL:
                              type = "opl";
                              break;
                      case AUDIODEV_TYPE_MPU:
                              type = "mpu";
                              break;
                      default:
                              panic("audioprint: unknown type %d", arg->type);
                      }
                      printf("%s at %s", type, pnp);
              }
              return UNCONF;
      }
      
      int
      audio_open(struct audio_softc *sc, int flags)
      {
              int error;
              int props;
      
              if (sc->mode)
                      return EBUSY;
              error = sc->ops->open(sc->arg, flags);
              if (error)
                      return error;
              sc->active = 0;
              sc->pause = 1;
              sc->rec.blocking = 0;
              sc->play.blocking = 0;
              sc->mode = 0;
              if (flags & FWRITE)
                      sc->mode |= AUMODE_PLAY;
              if (flags & FREAD)
                      sc->mode |= AUMODE_RECORD;
              props = sc->ops->get_props(sc->arg);
              if (sc->mode == (AUMODE_PLAY | AUMODE_RECORD)) {
                      if (!(props & AUDIO_PROP_FULLDUPLEX)) {
                              error = ENOTTY;
                              goto bad;
                      }
                      if (sc->ops->setfd) {
                              error = sc->ops->setfd(sc->arg, 1);
                              if (error)
                                      goto bad;
                      }
              }
      
              if (sc->ops->speaker_ctl) {
                      /*
                       * XXX: what is this used for?
                       */
                      sc->ops->speaker_ctl(sc->arg,
                          (sc->mode & AUMODE_PLAY) ? SPKR_ON : SPKR_OFF);
              }
      
              error = audio_setpar(sc);
              if (error)
                      goto bad;
              audio_clear(sc);
      
              /*
               * allow read(2)/write(2) to automatically start DMA, without
               * the need for ioctl(), to make /dev/audio usable in scripts
               */
              sc->pause = 0;
              return 0;
      bad:
              sc->ops->close(sc->arg);
              sc->mode = 0;
              return error;
      }
      
      int
      audio_drain(struct audio_softc *sc)
      {
              int error, xrun;
              unsigned char *ptr;
              size_t count, bpf;
      
              DPRINTF("%s: drain: mode = %d, pause = %d, active = %d, used = %zu\n",
                  DEVNAME(sc), sc->mode, sc->pause, sc->active, sc->play.used);
              if (!(sc->mode & AUMODE_PLAY) || sc->pause)
                      return 0;
      
              /* discard partial samples, required by audio_fill_sil() */
              mtx_enter(&audio_lock);
              bpf = sc->pchan * sc->bps;
              sc->play.used -= sc->play.used % bpf;
              if (sc->play.used == 0) {
                      mtx_leave(&audio_lock);
                      return 0;
              }
      
              if (!sc->active) {
                      /*
                       * dma not started yet because buffer was not full
                       * enough to start automatically. Pad it and start now.
                       */
                      for (;;) {
                              ptr = audio_buf_wgetblk(&sc->play, &count);
                              if (count == 0)
                                      break;
                              audio_fill_sil(sc, ptr, count);
                              audio_buf_wcommit(&sc->play, count);
                      }
                      mtx_leave(&audio_lock);
                      error = audio_start(sc);
                      if (error)
                              return error;
                      mtx_enter(&audio_lock);
              }
      
              xrun = sc->play.xrun;
              while (sc->play.xrun == xrun) {
                      DPRINTF("%s: drain: used = %zu, xrun = %d\n",
                          DEVNAME(sc), sc->play.used, sc->play.xrun);
      
                      /*
                       * set a 5 second timeout, in case interrupts don't
                       * work, useful only for debugging drivers
                       */
                      sc->play.blocking = 1;
                      error = msleep(&sc->play.blocking, &audio_lock,
                          PWAIT | PCATCH, "au_dr", 5 * hz);
                      if (!(sc->dev.dv_flags & DVF_ACTIVE))
                              error = EIO;
                      if (error) {
                              DPRINTF("%s: drain, err = %d\n", DEVNAME(sc), error);
                              break;
                      }
              }
              mtx_leave(&audio_lock);
              return error;
      }
      
      int
      audio_close(struct audio_softc *sc)
      {
              audio_drain(sc);
              if (sc->active)
                      audio_stop(sc);
              sc->ops->close(sc->arg);
              sc->mode = 0;
              DPRINTF("%s: close: done\n", DEVNAME(sc));
              return 0;
      }
      
      int
      audio_read(struct audio_softc *sc, struct uio *uio, int ioflag)
      {
              unsigned char *ptr;
              size_t count;
              int error;
      
              DPRINTFN(1, "%s: read: resid = %zd\n", DEVNAME(sc), uio->uio_resid);
      
              /* block if quiesced */
              while (sc->quiesce)
                      tsleep(&sc->quiesce, 0, "au_qrd", 0);
      
              /* start automatically if audio_ioc_start() was never called */
              if (audio_canstart(sc)) {
                      error = audio_start(sc);
                      if (error)
                              return error;
              }
      
              mtx_enter(&audio_lock);
      
              /* if there is no data then sleep */
              while (sc->rec.used == 0) {
                      if (ioflag & IO_NDELAY) {
                              mtx_leave(&audio_lock);
                              return EWOULDBLOCK;
                      }
                      DPRINTFN(1, "%s: read sleep\n", DEVNAME(sc));
                      sc->rec.blocking = 1;
                      error = msleep(&sc->rec.blocking,
                          &audio_lock, PWAIT | PCATCH, "au_rd", 0);
                      if (!(sc->dev.dv_flags & DVF_ACTIVE))
                              error = EIO;
                      if (error) {
                              DPRINTF("%s: read woke up error = %d\n",
                                  DEVNAME(sc), error);
                              mtx_leave(&audio_lock);
                              return error;
                      }
              }
      
              /* at this stage, there is data to transfer */
              while (uio->uio_resid > 0 && sc->rec.used > 0) {
                      ptr = audio_buf_rgetblk(&sc->rec, &count);
                      if (count > uio->uio_resid)
                              count = uio->uio_resid;
                      mtx_leave(&audio_lock);
                      DPRINTFN(1, "%s: read: start = %zu, count = %zu\n",
                          DEVNAME(sc), ptr - sc->rec.data, count);
                      if (sc->conv_dec)
                              sc->conv_dec(ptr, count);
                      error = uiomove(ptr, count, uio);
                      if (error)
                              return error;
                      mtx_enter(&audio_lock);
                      audio_buf_rdiscard(&sc->rec, count);
              }
              mtx_leave(&audio_lock);
              return 0;
      }
      
      int
      audio_write(struct audio_softc *sc, struct uio *uio, int ioflag)
      {
              unsigned char *ptr;
              size_t count;
              int error;
      
              DPRINTFN(1, "%s: write: resid = %zd\n",  DEVNAME(sc), uio->uio_resid);
      
              /* block if quiesced */
              while (sc->quiesce)
                      tsleep(&sc->quiesce, 0, "au_qwr", 0);
      
              /*
               * if IO_NDELAY flag is set then check if there is enough room
               * in the buffer to store at least one byte. If not then don't
               * start the write process.
               */
              mtx_enter(&audio_lock);
              if (uio->uio_resid > 0 && (ioflag & IO_NDELAY)) {
                      if (sc->play.used == sc->play.len) {
                              mtx_leave(&audio_lock);
                              return EWOULDBLOCK;
                      }
              }
      
              while (uio->uio_resid > 0) {
                      while (1) {
                              ptr = audio_buf_wgetblk(&sc->play, &count);
                              if (count > 0)
                                      break;
                              if (ioflag & IO_NDELAY) {
                                      /*
                                       * At this stage at least one byte is already
                                       * moved so we do not return EWOULDBLOCK
                                       */
                                      mtx_leave(&audio_lock);
                                      return 0;
                              }
                              DPRINTFN(1, "%s: write sleep\n", DEVNAME(sc));
                              sc->play.blocking = 1;
                              error = msleep(&sc->play.blocking,
                                  &audio_lock, PWAIT | PCATCH, "au_wr", 0);
                              if (!(sc->dev.dv_flags & DVF_ACTIVE))
                                      error = EIO;
                              if (error) {
                                      DPRINTF("%s: write woke up error = %d\n",
                                          DEVNAME(sc), error);
                                      mtx_leave(&audio_lock);
                                      return error;
                              }
                      }
                      if (count > uio->uio_resid)
                              count = uio->uio_resid;
                      mtx_leave(&audio_lock);
                      error = uiomove(ptr, count, uio);
                      if (error)
                              return 0;
                      if (sc->conv_enc) {
                              sc->conv_enc(ptr, count);
                              DPRINTFN(1, "audio_write: converted count = %zu\n",
                                  count);
                      }
                      if (sc->ops->copy_output)
                              sc->ops->copy_output(sc->arg, count);
      
                      mtx_enter(&audio_lock);
                      audio_buf_wcommit(&sc->play, count);
      
                      /* start automatically if audio_ioc_start() was never called */
                      if (audio_canstart(sc)) {
                              mtx_leave(&audio_lock);
                              error = audio_start(sc);
                              if (error)
                                      return error;
                              mtx_enter(&audio_lock);
                      }
              }
              mtx_leave(&audio_lock);
              return 0;
      }
      
      int
      audio_getdev(struct audio_softc *sc, struct audio_device *adev)
      {
              memset(adev, 0, sizeof(struct audio_device));
              if (sc->dev.dv_parent == NULL)
                      return EIO;
              strlcpy(adev->name, sc->dev.dv_parent->dv_xname, MAX_AUDIO_DEV_LEN);
              return 0;
      }
      
      int
      audio_ioctl(struct audio_softc *sc, unsigned long cmd, void *addr)
      {
              struct audio_pos *ap;
              int error = 0;
      
              /* block if quiesced */
              while (sc->quiesce)
                      tsleep(&sc->quiesce, 0, "au_qio", 0);
      
              switch (cmd) {
              case FIONBIO:
                      /* All handled in the upper FS layer. */
                      break;
              case AUDIO_GETPOS:
                      mtx_enter(&audio_lock);
                      ap = (struct audio_pos *)addr;
                      ap->play_pos = sc->play.pos;
                      ap->play_xrun = sc->play.xrun;
                      ap->rec_pos = sc->rec.pos;
                      ap->rec_xrun = sc->rec.xrun;
                      mtx_leave(&audio_lock);
                      break;
              case AUDIO_START:
                      return audio_ioc_start(sc);
              case AUDIO_STOP:
                      return audio_ioc_stop(sc);
              case AUDIO_SETPAR:
                      error = audio_ioc_setpar(sc, (struct audio_swpar *)addr);
                      break;
              case AUDIO_GETPAR:
                      error = audio_ioc_getpar(sc, (struct audio_swpar *)addr);
                      break;
              case AUDIO_GETSTATUS:
                      error = audio_ioc_getstatus(sc, (struct audio_status *)addr);
                      break;
              case AUDIO_GETDEV:
                      error = audio_getdev(sc, (struct audio_device *)addr);
                      break;
              default:
                      DPRINTF("%s: unknown ioctl 0x%lx\n", DEVNAME(sc), cmd);
                      error = ENOTTY;
                      break;
              }
              return error;
      }
      
      int
      audio_mixer_devinfo(struct audio_softc *sc, struct mixer_devinfo *devinfo)
      {
              if (devinfo->index < sc->mix_nent)
                      return sc->ops->query_devinfo(sc->arg, devinfo);        
      
              devinfo->next = -1;
              devinfo->prev = -1;
              switch (devinfo->index - sc->mix_nent) {
              case MIXER_RECORD:
                      strlcpy(devinfo->label.name, AudioCrecord, MAX_AUDIO_DEV_LEN);
                      devinfo->type = AUDIO_MIXER_CLASS;
                      devinfo->mixer_class = -1;
                      break;
              case MIXER_RECORD_ENABLE:
                      strlcpy(devinfo->label.name, "enable", MAX_AUDIO_DEV_LEN);
                      devinfo->type = AUDIO_MIXER_ENUM;
                      devinfo->mixer_class = MIXER_RECORD + sc->mix_nent;
                      devinfo->un.e.num_mem = 3;
                      devinfo->un.e.member[0].ord = MIXER_RECORD_ENABLE_OFF;
                      strlcpy(devinfo->un.e.member[0].label.name, "off",
                          MAX_AUDIO_DEV_LEN);
                      devinfo->un.e.member[1].ord = MIXER_RECORD_ENABLE_ON;
                      strlcpy(devinfo->un.e.member[1].label.name, "on",
                          MAX_AUDIO_DEV_LEN);
                      devinfo->un.e.member[2].ord = MIXER_RECORD_ENABLE_SYSCTL;
                      strlcpy(devinfo->un.e.member[2].label.name, "sysctl",
                          MAX_AUDIO_DEV_LEN);
                      break;
              default:
                      return EINVAL;
              }
      
              return 0;
      }
      
      int
      audio_mixer_read(struct audio_softc *sc, struct mixer_ctrl *c)
      {
              if (c->dev < sc->mix_nent)
                      return sc->ops->get_port(sc->arg, c);
      
              switch (c->dev - sc->mix_nent) {
              case MIXER_RECORD:
                      return EBADF;
              case MIXER_RECORD_ENABLE:
                      c->un.ord = sc->record_enable;
                      break;
              default:
                      return EINVAL;
              }
      
              return 0;
      }
      
      int
      audio_mixer_write(struct audio_softc *sc, struct mixer_ctrl *c, struct proc *p)
      {
              int error;
      
              if (c->dev < sc->mix_nent) {
                      error = sc->ops->set_port(sc->arg, c);
                      if (error)
                              return error;
                      if (sc->ops->commit_settings)
                              return sc->ops->commit_settings(sc->arg);
                      return 0;
              }
      
              switch (c->dev - sc->mix_nent) {
              case MIXER_RECORD:
                      return EBADF;
              case MIXER_RECORD_ENABLE:
                      switch (c->un.ord) {
                      case MIXER_RECORD_ENABLE_OFF:
                      case MIXER_RECORD_ENABLE_ON:
                      case MIXER_RECORD_ENABLE_SYSCTL:
                              break;
                      default:
                              return EINVAL;
                      }
                      if (suser(p) == 0)
                              sc->record_enable = c->un.ord;
                      break;
              default:
                      return EINVAL;
              }
      
              return 0;
      }
      
      int
      audio_ioctl_mixer(struct audio_softc *sc, unsigned long cmd, void *addr,
              struct proc *p)
      {
              /* block if quiesced */
              while (sc->quiesce)
                      tsleep(&sc->quiesce, 0, "mix_qio", 0);
      
              switch (cmd) {
              case FIONBIO:
                      /* All handled in the upper FS layer. */
                      break;
              case AUDIO_MIXER_DEVINFO:
                      return audio_mixer_devinfo(sc, addr);
              case AUDIO_MIXER_READ:
                      return audio_mixer_read(sc, addr);
              case AUDIO_MIXER_WRITE:
                      return audio_mixer_write(sc, addr, p);
              default:
                      return ENOTTY;
              }
              return 0;
      }
      
      int
      audio_poll(struct audio_softc *sc, int events, struct proc *p)
      {
              int revents = 0;
      
              mtx_enter(&audio_lock);
              if ((sc->mode & AUMODE_RECORD) && sc->rec.used > 0)
                      revents |= events & (POLLIN | POLLRDNORM);
              if ((sc->mode & AUMODE_PLAY) && sc->play.used < sc->play.len)
                      revents |= events & (POLLOUT | POLLWRNORM);
              if (revents == 0) {
                      if (events & (POLLIN | POLLRDNORM))
                              selrecord(p, &sc->rec.sel);
                      if (events & (POLLOUT | POLLWRNORM))
                              selrecord(p, &sc->play.sel);
              }
              mtx_leave(&audio_lock);
              return revents;
      }
      
      int
      audioopen(dev_t dev, int flags, int mode, struct proc *p)
    1 {
              struct audio_softc *sc;
              int error;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
    1         if (sc == NULL)
                      return ENXIO;
              if (sc->ops == NULL)
                      error = ENXIO;
              else {
                      switch (AUDIO_DEV(dev)) {
                      case AUDIO_DEV_AUDIO:
                              error = audio_open(sc, flags);
                              break;
                      case AUDIO_DEV_AUDIOCTL:
                      case AUDIO_DEV_MIXER:
                              error = 0;
                              break;
                      default:
                              error = ENXIO;
                      }
              }
              device_unref(&sc->dev);
              return error;
      }
      
      int
      audioclose(dev_t dev, int flags, int ifmt, struct proc *p)
      {
              struct audio_softc *sc;
              int error;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              switch (AUDIO_DEV(dev)) {
              case AUDIO_DEV_AUDIO:
                      error = audio_close(sc);
                      break;
              case AUDIO_DEV_MIXER:
              case AUDIO_DEV_AUDIOCTL:
                      error = 0;
                      break;
              default:
                      error = ENXIO;
              }
              device_unref(&sc->dev);
              return error;
      }
      
      int
      audioread(dev_t dev, struct uio *uio, int ioflag)
      {
              struct audio_softc *sc;
              int error;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              switch (AUDIO_DEV(dev)) {
              case AUDIO_DEV_AUDIO:
                      error = audio_read(sc, uio, ioflag);
                      break;
              case AUDIO_DEV_AUDIOCTL:
              case AUDIO_DEV_MIXER:
                      error = ENODEV;
                      break;
              default:
                      error = ENXIO;
              }
              device_unref(&sc->dev);
              return error;
      }
      
      int
      audiowrite(dev_t dev, struct uio *uio, int ioflag)
      {
              struct audio_softc *sc;
              int error;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              switch (AUDIO_DEV(dev)) {
              case AUDIO_DEV_AUDIO:
                      error = audio_write(sc, uio, ioflag);
                      break;
              case AUDIO_DEV_AUDIOCTL:
              case AUDIO_DEV_MIXER:
                      error = ENODEV;
                      break;
              default:
                      error = ENXIO;
              }
              device_unref(&sc->dev);
              return error;
      }
      
      int
      audioioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
      {
              struct audio_softc *sc;
              int error;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
              if (sc == NULL)
                      return ENXIO;
              switch (AUDIO_DEV(dev)) {
              case AUDIO_DEV_AUDIO:
                      error = audio_ioctl(sc, cmd, addr);
                      break;
              case AUDIO_DEV_AUDIOCTL:
                      if (cmd == AUDIO_SETPAR && sc->mode != 0) {
                              error = EBUSY;
                              break;
                      }
                      if (cmd == AUDIO_START || cmd == AUDIO_STOP) {
                              error = ENXIO;
                              break;
                      }
                      error = audio_ioctl(sc, cmd, addr);
                      break;
              case AUDIO_DEV_MIXER:
                      error = audio_ioctl_mixer(sc, cmd, addr, p);
                      break;
              default:
                      error = ENXIO;
              }
              device_unref(&sc->dev);
              return error;
      }
      
      int
      audiopoll(dev_t dev, int events, struct proc *p)
      {
              struct audio_softc *sc;
              int revents;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, AUDIO_UNIT(dev));
              if (sc == NULL)
                      return POLLERR;
              switch (AUDIO_DEV(dev)) {
              case AUDIO_DEV_AUDIO:
                      revents = audio_poll(sc, events, p);
                      break;
              case AUDIO_DEV_AUDIOCTL:
              case AUDIO_DEV_MIXER:
              default:
                      revents = 0;
                      break;
              }
              device_unref(&sc->dev);
              return revents;
      }
      
      #if NWSKBD > 0
      int
      wskbd_initmute(struct audio_softc *sc, struct mixer_devinfo *vol)
      {
              struct mixer_devinfo *mi;
              int index = -1;
      
              mi = malloc(sizeof(struct mixer_devinfo), M_TEMP, M_WAITOK);
      
              for (mi->index = vol->next; mi->index != -1; mi->index = mi->next) {
                      if (sc->ops->query_devinfo(sc->arg, mi) != 0)
                              break;
                      if (strcmp(mi->label.name, AudioNmute) == 0) {
                              index = mi->index;
                              break;
                      }
              }
      
              free(mi, M_TEMP, sizeof(struct mixer_devinfo));
              return index;
      }
      
      int
      wskbd_initvol(struct audio_softc *sc, struct wskbd_vol *vol, char *cn, char *dn)
      {
              struct mixer_devinfo *dev, *cls;
      
              vol->val = vol->mute = -1;
              dev = malloc(sizeof(struct mixer_devinfo), M_TEMP, M_WAITOK);
              cls = malloc(sizeof(struct mixer_devinfo), M_TEMP, M_WAITOK);
      
              for (dev->index = 0; ; dev->index++) {
                      if (sc->ops->query_devinfo(sc->arg, dev) != 0)
                              break;
                      if (dev->type != AUDIO_MIXER_VALUE)
                              continue;
                      cls->index = dev->mixer_class;
                      if (sc->ops->query_devinfo(sc->arg, cls) != 0)
                              continue;
                      if (strcmp(cls->label.name, cn) == 0 &&
                          strcmp(dev->label.name, dn) == 0) {
                              vol->val = dev->index;
                              vol->nch = dev->un.v.num_channels;
                              vol->step = dev->un.v.delta > 8 ? dev->un.v.delta : 8;
                              vol->mute = wskbd_initmute(sc, dev);
                              vol->val_pending = vol->mute_pending = 0;
                              DPRINTF("%s: wskbd using %s.%s%s\n", DEVNAME(sc),
                                  cn, dn, vol->mute >= 0 ? ", mute control" : "");
                              break;
                      }
              }
      
              free(cls, M_TEMP, sizeof(struct mixer_devinfo));
              free(dev, M_TEMP, sizeof(struct mixer_devinfo));
              return (vol->val != -1);
      }
      
      void
      wskbd_mixer_init(struct audio_softc *sc)
      {
              static struct {
                      char *cn, *dn;
              } spkr_names[] = {
                      {AudioCoutputs, AudioNmaster},
                      {AudioCinputs,  AudioNdac},
                      {AudioCoutputs, AudioNdac},
                      {AudioCoutputs, AudioNoutput}
              }, mic_names[] = {
                      {AudioCrecord, AudioNrecord},
                      {AudioCrecord, AudioNvolume},
                      {AudioCinputs, AudioNrecord},
                      {AudioCinputs, AudioNvolume},
                      {AudioCinputs, AudioNinput}
              };
              int i;
      
              if (sc->dev.dv_unit != 0) {
                      DPRINTF("%s: not configuring wskbd keys\n", DEVNAME(sc));
                      return;
              }
              for (i = 0; i < sizeof(spkr_names) / sizeof(spkr_names[0]); i++) {
                      if (wskbd_initvol(sc, &sc->spkr,
                              spkr_names[i].cn, spkr_names[i].dn))
                              break;
              }
              for (i = 0; i < sizeof(mic_names) / sizeof(mic_names[0]); i++) {
                      if (wskbd_initvol(sc, &sc->mic,
                              mic_names[i].cn, mic_names[i].dn))
                              break;
              }
              task_set(&sc->wskbd_task, wskbd_mixer_cb, sc);
      }
      
      void
      wskbd_mixer_update(struct audio_softc *sc, struct wskbd_vol *vol)
      {
              struct mixer_ctrl ctrl;
              int val_pending, mute_pending, i, gain, error, s;
      
              s = spltty();
              val_pending = vol->val_pending;
              vol->val_pending = 0;
              mute_pending = vol->mute_pending;
              vol->mute_pending = 0;
              splx(s);
      
              if (sc->ops == NULL)
                      return;
              if (vol->mute >= 0 && mute_pending) {
                      ctrl.dev = vol->mute;
                      ctrl.type = AUDIO_MIXER_ENUM;
                      error = sc->ops->get_port(sc->arg, &ctrl);
                      if (error) {
                              DPRINTF("%s: get mute err = %d\n", DEVNAME(sc), error);
                              return;
                      }
                      switch (mute_pending) {
                      case WSKBD_MUTE_TOGGLE:
                              ctrl.un.ord = !ctrl.un.ord;
                              break;
                      case WSKBD_MUTE_DISABLE:
                              ctrl.un.ord = 0;
                              break;
                      case WSKBD_MUTE_ENABLE:
                              ctrl.un.ord = 1;
                              break;
                      }
                      DPRINTFN(1, "%s: wskbd mute setting to %d\n",
                          DEVNAME(sc), ctrl.un.ord);
                      error = sc->ops->set_port(sc->arg, &ctrl);
                      if (error) {
                              DPRINTF("%s: set mute err = %d\n", DEVNAME(sc), error);
                              return;
                      }
              }
              if (vol->val >= 0 && val_pending) {
                      ctrl.dev = vol->val;
                      ctrl.type = AUDIO_MIXER_VALUE;
                      ctrl.un.value.num_channels = vol->nch;
                      error = sc->ops->get_port(sc->arg, &ctrl);
                      if (error) {
                              DPRINTF("%s: get mute err = %d\n", DEVNAME(sc), error);
                              return;
                      }
                      for (i = 0; i < vol->nch; i++) {
                              gain = ctrl.un.value.level[i] + vol->step * val_pending;
                              if (gain > AUDIO_MAX_GAIN)
                                      gain = AUDIO_MAX_GAIN;
                              else if (gain < AUDIO_MIN_GAIN)
                                      gain = AUDIO_MIN_GAIN;
                              ctrl.un.value.level[i] = gain;
                              DPRINTFN(1, "%s: wskbd level %d set to %d\n",
                                  DEVNAME(sc), i, gain);
                      }
                      error = sc->ops->set_port(sc->arg, &ctrl);
                      if (error) {
                              DPRINTF("%s: set vol err = %d\n", DEVNAME(sc), error);
                              return;
                      }
              }
      }
      
      void
      wskbd_mixer_cb(void *arg)
      {
              struct audio_softc *sc = arg;
      
              wskbd_mixer_update(sc, &sc->spkr);
              wskbd_mixer_update(sc, &sc->mic);
              device_unref(&sc->dev);
      }
      
      int
      wskbd_set_mixermute(long mute, long out)
      {
              struct audio_softc *sc;
              struct wskbd_vol *vol;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, 0);
              if (sc == NULL)
                      return ENODEV;
              vol = out ? &sc->spkr : &sc->mic;
              vol->mute_pending = mute ? WSKBD_MUTE_ENABLE : WSKBD_MUTE_DISABLE;
              if (!task_add(systq, &sc->wskbd_task))
                      device_unref(&sc->dev);
              return 0;
      }
      
      int
      wskbd_set_mixervolume(long dir, long out)
      {
              struct audio_softc *sc;
              struct wskbd_vol *vol;
      
              sc = (struct audio_softc *)device_lookup(&audio_cd, 0);
              if (sc == NULL)
                      return ENODEV;
              vol = out ? &sc->spkr : &sc->mic;
              if (dir == 0)
                      vol->mute_pending ^= WSKBD_MUTE_TOGGLE;
              else
                      vol->val_pending += dir;
              if (!task_add(systq, &sc->wskbd_task))
                      device_unref(&sc->dev);
              return 0;
      }
      #endif /* NWSKBD > 0 */
      /*        $OpenBSD: if_vio.c,v 1.13 2019/08/06 19:24:45 bluhm Exp $        */
      
      /*
       * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
       * Copyright (c) 2010 Minoura Makoto.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      #include "bpfilter.h"
      #include "vlan.h"
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/kernel.h>
      #include <sys/device.h>
      #include <sys/mbuf.h>
      #include <sys/socket.h>
      #include <sys/sockio.h>
      #include <sys/timeout.h>
      
      #include <dev/pv/virtioreg.h>
      #include <dev/pv/virtiovar.h>
      
      #include <net/if.h>
      #include <net/if_media.h>
      
      #include <netinet/in.h>
      #include <netinet/if_ether.h>
      #include <netinet/ip.h>
      #include <netinet/tcp.h>
      #include <netinet/udp.h>
      
      #if NBPFILTER > 0
      #include <net/bpf.h>
      #endif
      
      #if VIRTIO_DEBUG
      #define DPRINTF(x...) printf(x)
      #else
      #define DPRINTF(x...)
      #endif
      
      /*
       * if_vioreg.h:
       */
      /* Configuration registers */
      #define VIRTIO_NET_CONFIG_MAC                0 /* 8bit x 6byte */
      #define VIRTIO_NET_CONFIG_STATUS        6 /* 16bit */
      
      /* Feature bits */
      #define VIRTIO_NET_F_CSUM                        (1ULL<<0)
      #define VIRTIO_NET_F_GUEST_CSUM                        (1ULL<<1)
      #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS        (1ULL<<2)
      #define VIRTIO_NET_F_MTU                        (1ULL<<3)
      #define VIRTIO_NET_F_MAC                        (1ULL<<5)
      #define VIRTIO_NET_F_GSO                        (1ULL<<6)
      #define VIRTIO_NET_F_GUEST_TSO4                        (1ULL<<7)
      #define VIRTIO_NET_F_GUEST_TSO6                        (1ULL<<8)
      #define VIRTIO_NET_F_GUEST_ECN                        (1ULL<<9)
      #define VIRTIO_NET_F_GUEST_UFO                        (1ULL<<10)
      #define VIRTIO_NET_F_HOST_TSO4                        (1ULL<<11)
      #define VIRTIO_NET_F_HOST_TSO6                        (1ULL<<12)
      #define VIRTIO_NET_F_HOST_ECN                        (1ULL<<13)
      #define VIRTIO_NET_F_HOST_UFO                        (1ULL<<14)
      #define VIRTIO_NET_F_MRG_RXBUF                        (1ULL<<15)
      #define VIRTIO_NET_F_STATUS                        (1ULL<<16)
      #define VIRTIO_NET_F_CTRL_VQ                        (1ULL<<17)
      #define VIRTIO_NET_F_CTRL_RX                        (1ULL<<18)
      #define VIRTIO_NET_F_CTRL_VLAN                        (1ULL<<19)
      #define VIRTIO_NET_F_CTRL_RX_EXTRA                (1ULL<<20)
      #define VIRTIO_NET_F_GUEST_ANNOUNCE                (1ULL<<21)
      #define VIRTIO_NET_F_MQ                                (1ULL<<22)
      #define VIRTIO_NET_F_CTRL_MAC_ADDR                (1ULL<<23)
      
      /*
       * Config(8) flags. The lowest byte is reserved for generic virtio stuff.
       */
      
      /* Workaround for vlan related bug in qemu < version 2.0 */
      #define CONFFLAG_QEMU_VLAN_BUG                (1<<8)
      
      static const struct virtio_feature_name virtio_net_feature_names[] = {
      #if VIRTIO_DEBUG
              { VIRTIO_NET_F_CSUM,                        "CSum" },
              { VIRTIO_NET_F_GUEST_CSUM,                "GuestCSum" },
              { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,        "CtrlGuestOffl" },
              { VIRTIO_NET_F_MTU,                        "MTU", },
              { VIRTIO_NET_F_MAC,                        "MAC" },
              { VIRTIO_NET_F_GSO,                        "GSO" },
              { VIRTIO_NET_F_GUEST_TSO4,                "GuestTSO4" },
              { VIRTIO_NET_F_GUEST_TSO6,                "GuestTSO6" },
              { VIRTIO_NET_F_GUEST_ECN,                "GuestECN" },
              { VIRTIO_NET_F_GUEST_UFO,                "GuestUFO" },
              { VIRTIO_NET_F_HOST_TSO4,                "HostTSO4" },
              { VIRTIO_NET_F_HOST_TSO6,                "HostTSO6" },
              { VIRTIO_NET_F_HOST_ECN,                 "HostECN" },
              { VIRTIO_NET_F_HOST_UFO,                 "HostUFO" },
              { VIRTIO_NET_F_MRG_RXBUF,                "MrgRXBuf" },
              { VIRTIO_NET_F_STATUS,                        "Status" },
              { VIRTIO_NET_F_CTRL_VQ,                        "CtrlVQ" },
              { VIRTIO_NET_F_CTRL_RX,                        "CtrlRX" },
              { VIRTIO_NET_F_CTRL_VLAN,                "CtrlVLAN" },
              { VIRTIO_NET_F_CTRL_RX_EXTRA,                "CtrlRXExtra" },
              { VIRTIO_NET_F_GUEST_ANNOUNCE,                "GuestAnnounce" },
              { VIRTIO_NET_F_MQ,                        "MQ" },
              { VIRTIO_NET_F_CTRL_MAC_ADDR,                "CtrlMAC" },
      #endif
              { 0,                                 NULL }
      };
      
      /* Status */
      #define VIRTIO_NET_S_LINK_UP        1
      
      /* Packet header structure */
      struct virtio_net_hdr {
              uint8_t                flags;
              uint8_t                gso_type;
              uint16_t        hdr_len;
              uint16_t        gso_size;
              uint16_t        csum_start;
              uint16_t        csum_offset;
      
              /* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */
              uint16_t        num_buffers;
      } __packed;
      
      #define VIRTIO_NET_HDR_F_NEEDS_CSUM        1 /* flags */
      #define VIRTIO_NET_HDR_GSO_NONE                0 /* gso_type */
      #define VIRTIO_NET_HDR_GSO_TCPV4        1 /* gso_type */
      #define VIRTIO_NET_HDR_GSO_UDP                3 /* gso_type */
      #define VIRTIO_NET_HDR_GSO_TCPV6        4 /* gso_type */
      #define VIRTIO_NET_HDR_GSO_ECN                0x80 /* gso_type, |'ed */
      
      #define VIRTIO_NET_MAX_GSO_LEN                (65536+ETHER_HDR_LEN)
      
      /* Control virtqueue */
      struct virtio_net_ctrl_cmd {
              uint8_t        class;
              uint8_t        command;
      } __packed;
      #define VIRTIO_NET_CTRL_RX                0
      # define VIRTIO_NET_CTRL_RX_PROMISC        0
      # define VIRTIO_NET_CTRL_RX_ALLMULTI        1
      
      #define VIRTIO_NET_CTRL_MAC                1
      # define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
      
      #define VIRTIO_NET_CTRL_VLAN                2
      # define VIRTIO_NET_CTRL_VLAN_ADD        0
      # define VIRTIO_NET_CTRL_VLAN_DEL        1
      
      struct virtio_net_ctrl_status {
              uint8_t        ack;
      } __packed;
      #define VIRTIO_NET_OK                        0
      #define VIRTIO_NET_ERR                        1
      
      struct virtio_net_ctrl_rx {
              uint8_t        onoff;
      } __packed;
      
      struct virtio_net_ctrl_mac_tbl {
              uint32_t nentries;
              uint8_t macs[][ETHER_ADDR_LEN];
      } __packed;
      
      struct virtio_net_ctrl_vlan {
              uint16_t id;
      } __packed;
      
      /*
       * if_viovar.h:
       */
      enum vio_ctrl_state {
              FREE, INUSE, DONE, RESET
      };
      
      struct vio_softc {
              struct device                sc_dev;
      
              struct virtio_softc        *sc_virtio;
      #define        VQRX        0
      #define        VQTX        1
      #define        VQCTL        2
              struct virtqueue        sc_vq[3];
      
              struct arpcom                sc_ac;
              struct ifmedia                sc_media;
      
              short                        sc_ifflags;
      
              /* bus_dmamem */
              bus_dma_segment_t        sc_dma_seg;
              bus_dmamap_t                sc_dma_map;
              size_t                        sc_dma_size;
              caddr_t                        sc_dma_kva;
      
              int                        sc_hdr_size;
              struct virtio_net_hdr        *sc_tx_hdrs;
              struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
              struct virtio_net_ctrl_status *sc_ctrl_status;
              struct virtio_net_ctrl_rx *sc_ctrl_rx;
              struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
      #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
              struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
      
              /* kmem */
              bus_dmamap_t                *sc_arrays;
      #define sc_rx_dmamaps sc_arrays
              bus_dmamap_t                *sc_tx_dmamaps;
              struct mbuf                **sc_rx_mbufs;
              struct mbuf                **sc_tx_mbufs;
              struct if_rxring        sc_rx_ring;
      
              enum vio_ctrl_state        sc_ctrl_inuse;
      
              struct timeout                sc_txtick, sc_rxtick;
      };
      
      #define VIO_DMAMEM_OFFSET(sc, p) ((caddr_t)(p) - (sc)->sc_dma_kva)
      #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)                \
              bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,        \
                  VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
      #define VIO_DMAMEM_ENQUEUE(sc, vq, slot, p, size, write)        \
              virtio_enqueue_p((vq), (slot), (sc)->sc_dma_map,        \
                  VIO_DMAMEM_OFFSET((sc), (p)), (size), (write))
      #define VIO_HAVE_MRG_RXBUF(sc)                                        \
              ((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
      
      #define VIRTIO_NET_TX_MAXNSEGS                16 /* for larger chains, defrag */
      #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES        64 /* for more entries, use ALLMULTI */
      #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES         1 /* one entry for own unicast addr */
      
      #define VIO_CTRL_MAC_INFO_SIZE                                         \
              (2*sizeof(struct virtio_net_ctrl_mac_tbl) +                 \
               (VIRTIO_NET_CTRL_MAC_MC_ENTRIES +                         \
                VIRTIO_NET_CTRL_MAC_UC_ENTRIES) * ETHER_ADDR_LEN)
      
      /* cfattach interface functions */
      int        vio_match(struct device *, void *, void *);
      void        vio_attach(struct device *, struct device *, void *);
      
      /* ifnet interface functions */
      int        vio_init(struct ifnet *);
      void        vio_stop(struct ifnet *, int);
      void        vio_start(struct ifnet *);
      int        vio_ioctl(struct ifnet *, u_long, caddr_t);
      void        vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc);
      void        vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc);
      
      /* rx */
      int        vio_add_rx_mbuf(struct vio_softc *, int);
      void        vio_free_rx_mbuf(struct vio_softc *, int);
      void        vio_populate_rx_mbufs(struct vio_softc *);
      int        vio_rxeof(struct vio_softc *);
      int        vio_rx_intr(struct virtqueue *);
      void        vio_rx_drain(struct vio_softc *);
      void        vio_rxtick(void *);
      
      /* tx */
      int        vio_tx_intr(struct virtqueue *);
      int        vio_txeof(struct virtqueue *);
      void        vio_tx_drain(struct vio_softc *);
      int        vio_encap(struct vio_softc *, int, struct mbuf *);
      void        vio_txtick(void *);
      
      /* other control */
      void        vio_link_state(struct ifnet *);
      int        vio_config_change(struct virtio_softc *);
      int        vio_ctrl_rx(struct vio_softc *, int, int);
      int        vio_set_rx_filter(struct vio_softc *);
      void        vio_iff(struct vio_softc *);
      int        vio_media_change(struct ifnet *);
      void        vio_media_status(struct ifnet *, struct ifmediareq *);
      int        vio_ctrleof(struct virtqueue *);
      int        vio_wait_ctrl(struct vio_softc *sc);
      int        vio_wait_ctrl_done(struct vio_softc *sc);
      void        vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
      int        vio_alloc_mem(struct vio_softc *);
      int        vio_alloc_dmamem(struct vio_softc *);
      void        vio_free_dmamem(struct vio_softc *);
      
      #if VIRTIO_DEBUG
      void        vio_dump(struct vio_softc *);
      #endif
      
      int
      vio_match(struct device *parent, void *match, void *aux)
      {
              struct virtio_softc *va = aux;
      
              if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_NETWORK)
                      return 1;
      
              return 0;
      }
      
      struct cfattach vio_ca = {
              sizeof(struct vio_softc), vio_match, vio_attach, NULL
      };
      
      struct cfdriver vio_cd = {
              NULL, "vio", DV_IFNET
      };
      
      int
      vio_alloc_dmamem(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              int nsegs;
      
              if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
                  sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
                  &sc->sc_dma_map) != 0)
                      goto err;
              if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
                  &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
                      goto destroy;
              if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
                  sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
                      goto free;
              if (bus_dmamap_load(vsc->sc_dmat, sc->sc_dma_map, sc->sc_dma_kva,
                  sc->sc_dma_size, NULL, BUS_DMA_NOWAIT) != 0)
                      goto unmap;
              return (0);
      
      unmap:
              bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
      free:
              bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
      destroy:
              bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
      err:
              return (1);
      }
      
      void
      vio_free_dmamem(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map);
              bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size);
              bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1);
              bus_dmamap_destroy(vsc->sc_dmat, sc->sc_dma_map);
      }
      
      /* allocate memory */
      /*
       * dma memory is used for:
       *   sc_tx_hdrs[slot]:         metadata array for frames to be sent (WRITE)
       *   sc_ctrl_cmd:         command to be sent via ctrl vq (WRITE)
       *   sc_ctrl_status:         return value for a command via ctrl vq (READ)
       *   sc_ctrl_rx:         parameter for a VIRTIO_NET_CTRL_RX class command
       *                         (WRITE)
       *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
       *                         class command (WRITE)
       *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
       *                         class command (WRITE)
       * sc_ctrl_* structures are allocated only one each; they are protected by
       * sc_ctrl_inuse, which must only be accessed at splnet
       *
       * metadata headers for received frames are stored at the start of the
       * rx mbufs.
       */
      /*
       * dynamically allocated memory is used for:
       *   sc_rx_dmamaps[slot]:        bus_dmamap_t array for received payload
       *   sc_tx_dmamaps[slot]:        bus_dmamap_t array for sent payload
       *   sc_rx_mbufs[slot]:                mbuf pointer array for received frames
       *   sc_tx_mbufs[slot]:                mbuf pointer array for sent frames
       */
      int
      vio_alloc_mem(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              int allocsize, r, i, txsize;
              unsigned int offset = 0;
              int rxqsize, txqsize;
              caddr_t kva;
      
              rxqsize = vsc->sc_vqs[0].vq_num;
              txqsize = vsc->sc_vqs[1].vq_num;
      
              /*
               * For simplicity, we always allocate the full virtio_net_hdr size
               * even if VIRTIO_NET_F_MRG_RXBUF is not negotiated and
               * only a part of the memory is ever used.
               */
              allocsize = sizeof(struct virtio_net_hdr) * txqsize;
      
              if (vsc->sc_nvqs == 3) {
                      allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
                      allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
                      allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
                      allocsize += VIO_CTRL_MAC_INFO_SIZE;
              }
              sc->sc_dma_size = allocsize;
      
              if (vio_alloc_dmamem(sc) != 0) {
                      printf("unable to allocate dma region\n");
                      return  -1;
              }
      
              kva = sc->sc_dma_kva;
              sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset);
              offset += sizeof(struct virtio_net_hdr) * txqsize;
              if (vsc->sc_nvqs == 3) {
                      sc->sc_ctrl_cmd = (void*)(kva + offset);
                      offset += sizeof(*sc->sc_ctrl_cmd);
                      sc->sc_ctrl_status = (void*)(kva + offset);
                      offset += sizeof(*sc->sc_ctrl_status);
                      sc->sc_ctrl_rx = (void*)(kva + offset);
                      offset += sizeof(*sc->sc_ctrl_rx);
                      sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
                      offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
                          ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
                      sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset);
              }
      
              sc->sc_arrays = mallocarray(rxqsize + txqsize,
                  2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF,
                  M_WAITOK | M_CANFAIL | M_ZERO);
              if (sc->sc_arrays == NULL) {
                      printf("unable to allocate mem for dmamaps\n");
                      goto err_hdr;
              }
              allocsize = (rxqsize + txqsize) *
                  (2 * sizeof(bus_dmamap_t) + sizeof(struct mbuf *));
      
              sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize;
              sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize);
              sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
      
              for (i = 0; i < rxqsize; i++) {
                      r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
                          BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
                      if (r != 0)
                              goto err_reqs;
              }
      
              txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
              for (i = 0; i < txqsize; i++) {
                      r = bus_dmamap_create(vsc->sc_dmat, txsize,
                          VIRTIO_NET_TX_MAXNSEGS, txsize, 0,
                          BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
                          &sc->sc_tx_dmamaps[i]);
                      if (r != 0)
                              goto err_reqs;
              }
      
              return 0;
      
      err_reqs:
              printf("dmamap creation failed, error %d\n", r);
              for (i = 0; i < txqsize; i++) {
                      if (sc->sc_tx_dmamaps[i])
                              bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
              }
              for (i = 0; i < rxqsize; i++) {
                      if (sc->sc_rx_dmamaps[i])
                              bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]);
              }
              if (sc->sc_arrays) {
                      free(sc->sc_arrays, M_DEVBUF, 0);
                      sc->sc_arrays = 0;
              }
      err_hdr:
              vio_free_dmamem(sc);
              return -1;
      }
      
      void
      vio_get_lladr(struct arpcom *ac, struct virtio_softc *vsc)
      {
              int i;
              for (i = 0; i < ETHER_ADDR_LEN; i++) {
                      ac->ac_enaddr[i] = virtio_read_device_config_1(vsc,
                          VIRTIO_NET_CONFIG_MAC + i);
              }
      }
      
      void
      vio_put_lladr(struct arpcom *ac, struct virtio_softc *vsc)
      {
              int i;
              for (i = 0; i < ETHER_ADDR_LEN; i++) {
                      virtio_write_device_config_1(vsc, VIRTIO_NET_CONFIG_MAC + i,
                           ac->ac_enaddr[i]);
              }
      }
      
      void
      vio_attach(struct device *parent, struct device *self, void *aux)
      {
              struct vio_softc *sc = (struct vio_softc *)self;
              struct virtio_softc *vsc = (struct virtio_softc *)parent;
              int i;
              struct ifnet *ifp = &sc->sc_ac.ac_if;
      
              if (vsc->sc_child != NULL) {
                      printf(": child already attached for %s; something wrong...\n",
                             parent->dv_xname);
                      return;
              }
      
              sc->sc_virtio = vsc;
      
              vsc->sc_child = self;
              vsc->sc_ipl = IPL_NET;
              vsc->sc_vqs = &sc->sc_vq[0];
              vsc->sc_config_change = 0;
              vsc->sc_driver_features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
                  VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX |
                  VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
                  VIRTIO_F_RING_EVENT_IDX;
      
              virtio_negotiate_features(vsc, virtio_net_feature_names);
              if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
                      vio_get_lladr(&sc->sc_ac, vsc);
              } else {
                      ether_fakeaddr(ifp);
                      vio_put_lladr(&sc->sc_ac, vsc);
              }
              printf(": address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
      
              if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
                  vsc->sc_version_1) {
                      sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
              } else {
                      sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
              }
              if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
                      ifp->if_hardmtu = 16000; /* arbitrary limit */
              else
                      ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
      
              if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
                      goto err;
              vsc->sc_nvqs = 1;
              sc->sc_vq[VQRX].vq_done = vio_rx_intr;
              if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1,
                  sc->sc_hdr_size + ifp->if_hardmtu + ETHER_HDR_LEN,
                  VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
                      goto err;
              }
              vsc->sc_nvqs = 2;
              sc->sc_vq[VQTX].vq_done = vio_tx_intr;
              virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
              if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
                      virtio_postpone_intr_far(&sc->sc_vq[VQTX]);
              else
                      virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
              if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)
                  && virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_RX)) {
                      if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, NBPG, 1,
                          "control") == 0) {
                              sc->sc_vq[VQCTL].vq_done = vio_ctrleof;
                              virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
                              vsc->sc_nvqs = 3;
                      }
              }
      
              if (vio_alloc_mem(sc) < 0)
                      goto err;
      
              strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
              ifp->if_softc = sc;
              ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
              ifp->if_start = vio_start;
              ifp->if_ioctl = vio_ioctl;
              ifp->if_capabilities = IFCAP_VLAN_MTU;
              if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
                      ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4;
              IFQ_SET_MAXLEN(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
              ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
              ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
              ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
              vsc->sc_config_change = vio_config_change;
              timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]);
              timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]);
      
              if_attach(ifp);
              ether_ifattach(ifp);
      
              return;
      
      err:
              for (i = 0; i < vsc->sc_nvqs; i++)
                      virtio_free_vq(vsc, &sc->sc_vq[i]);
              vsc->sc_nvqs = 0;
              vsc->sc_child = VIRTIO_CHILD_ERROR;
              return;
      }
      
      /* check link status */
      void
      vio_link_state(struct ifnet *ifp)
      {
              struct vio_softc *sc = ifp->if_softc;
              struct virtio_softc *vsc = sc->sc_virtio;
              int link_state = LINK_STATE_FULL_DUPLEX;
      
              if (virtio_has_feature(vsc, VIRTIO_NET_F_STATUS)) {
                      int status = virtio_read_device_config_2(vsc,
                          VIRTIO_NET_CONFIG_STATUS);
                      if (!(status & VIRTIO_NET_S_LINK_UP))
                              link_state = LINK_STATE_DOWN;
              }
              if (ifp->if_link_state != link_state) {
                      ifp->if_link_state = link_state;
                      if_link_state_change(ifp);
              }
      }
      
      int
      vio_config_change(struct virtio_softc *vsc)
      {
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              vio_link_state(&sc->sc_ac.ac_if);
              return 1;
      }
      
      int
      vio_media_change(struct ifnet *ifp)
      {
              /* Ignore */
              return (0);
      }
      
      void
      vio_media_status(struct ifnet *ifp, struct ifmediareq *imr)
      {
              imr->ifm_active = IFM_ETHER | IFM_AUTO;
              imr->ifm_status = IFM_AVALID;
      
              vio_link_state(ifp);
              if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP)
                      imr->ifm_status |= IFM_ACTIVE|IFM_FDX;
      }
      
      /*
       * Interface functions for ifnet
       */
      int
      vio_init(struct ifnet *ifp)
      {
              struct vio_softc *sc = ifp->if_softc;
      
              vio_stop(ifp, 0);
              if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
                  sc->sc_vq[VQRX].vq_num);
              vio_populate_rx_mbufs(sc);
              ifp->if_flags |= IFF_RUNNING;
              ifq_clr_oactive(&ifp->if_snd);
              vio_iff(sc);
              vio_link_state(ifp);
              return 0;
      }
      
      void
      vio_stop(struct ifnet *ifp, int disable)
      {
              struct vio_softc *sc = ifp->if_softc;
              struct virtio_softc *vsc = sc->sc_virtio;
      
              timeout_del(&sc->sc_txtick);
              timeout_del(&sc->sc_rxtick);
              ifp->if_flags &= ~IFF_RUNNING;
              ifq_clr_oactive(&ifp->if_snd);
              /* only way to stop I/O and DMA is resetting... */
              virtio_reset(vsc);
              vio_rxeof(sc);
              if (vsc->sc_nvqs >= 3)
                      vio_ctrleof(&sc->sc_vq[VQCTL]);
              vio_tx_drain(sc);
              if (disable)
                      vio_rx_drain(sc);
      
              virtio_reinit_start(vsc);
              virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
              virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
              if (vsc->sc_nvqs >= 3)
                      virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
              virtio_reinit_end(vsc);
              if (vsc->sc_nvqs >= 3) {
                      if (sc->sc_ctrl_inuse != FREE)
                              sc->sc_ctrl_inuse = RESET;
                      wakeup(&sc->sc_ctrl_inuse);
              }
      }
      
      void
      vio_start(struct ifnet *ifp)
      {
              struct vio_softc *sc = ifp->if_softc;
              struct virtio_softc *vsc = sc->sc_virtio;
              struct virtqueue *vq = &sc->sc_vq[VQTX];
              struct mbuf *m;
              int queued = 0;
      
              vio_txeof(vq);
      
              if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
                      return;
              if (IFQ_IS_EMPTY(&ifp->if_snd))
                      return;
      
      again:
              for (;;) {
                      int slot, r;
                      struct virtio_net_hdr *hdr;
      
                      m = ifq_deq_begin(&ifp->if_snd);
                      if (m == NULL)
                              break;
      
                      r = virtio_enqueue_prep(vq, &slot);
                      if (r == EAGAIN) {
                              ifq_deq_rollback(&ifp->if_snd, m);
                              ifq_set_oactive(&ifp->if_snd);
                              break;
                      }
                      if (r != 0)
                              panic("enqueue_prep for a tx buffer: %d", r);
      
                      hdr = &sc->sc_tx_hdrs[slot];
                      memset(hdr, 0, sc->sc_hdr_size);
                      if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
                              struct mbuf *mip;
                              struct ip *ip;
                              int ehdrlen = ETHER_HDR_LEN;
                              int ipoff;
      #if NVLAN > 0
                              struct ether_vlan_header *eh;
      
                              eh = mtod(m, struct ether_vlan_header *);
                              if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
                                      ehdrlen += ETHER_VLAN_ENCAP_LEN;
      #endif
      
                              if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
                                      hdr->csum_offset = offsetof(struct tcphdr, th_sum);
                              else
                                      hdr->csum_offset = offsetof(struct udphdr, uh_sum);
      
                              mip = m_getptr(m, ehdrlen, &ipoff);
                              KASSERT(mip != NULL && mip->m_len - ipoff >= sizeof(*ip));
                              ip = (struct ip *)(mip->m_data + ipoff);
                              hdr->csum_start = ehdrlen + (ip->ip_hl << 2);
                              hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
                      }
      
                      r = vio_encap(sc, slot, m);
                      if (r != 0) {
                              virtio_enqueue_abort(vq, slot);
                              ifq_deq_commit(&ifp->if_snd, m);
                              m_freem(m);
                              ifp->if_oerrors++;
                              continue;
                      }
                      r = virtio_enqueue_reserve(vq, slot,
                          sc->sc_tx_dmamaps[slot]->dm_nsegs + 1);
                      if (r != 0) {
                              bus_dmamap_unload(vsc->sc_dmat,
                                  sc->sc_tx_dmamaps[slot]);
                              ifq_deq_rollback(&ifp->if_snd, m);
                              sc->sc_tx_mbufs[slot] = NULL;
                              ifq_set_oactive(&ifp->if_snd);
                              break;
                      }
                      ifq_deq_commit(&ifp->if_snd, m);
      
                      bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
                          sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE);
                      VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
                          BUS_DMASYNC_PREWRITE);
                      VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sc->sc_hdr_size, 1);
                      virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1);
                      virtio_enqueue_commit(vsc, vq, slot, 0);
                      queued++;
      #if NBPFILTER > 0
                      if (ifp->if_bpf)
                              bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
      #endif
              }
              if (ifq_is_oactive(&ifp->if_snd)) {
                      int r;
                      if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
                              r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]);
                      else
                              r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]);
                      if (r) {
                              vio_txeof(vq);
                              goto again;
                      }
              }
      
              if (queued > 0) {
                      virtio_notify(vsc, vq);
                      timeout_add_sec(&sc->sc_txtick, 1);
              }
      }
      
      #if VIRTIO_DEBUG
      void
      vio_dump(struct vio_softc *sc)
      {
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              struct virtio_softc *vsc = sc->sc_virtio;
      
              printf("%s status dump:\n", ifp->if_xname);
              printf("TX virtqueue:\n");
              virtio_vq_dump(&vsc->sc_vqs[VQTX]);
              printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick));
              printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick));
              printf("RX virtqueue:\n");
              virtio_vq_dump(&vsc->sc_vqs[VQRX]);
              if (vsc->sc_nvqs == 3) {
                      printf("CTL virtqueue:\n");
                      virtio_vq_dump(&vsc->sc_vqs[VQCTL]);
                      printf("ctrl_inuse: %d\n", sc->sc_ctrl_inuse);
              }
      }
      #endif
      
      int
      vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
   16 {
              struct vio_softc *sc = ifp->if_softc;
              struct ifreq *ifr = (struct ifreq *)data;
              int s, r = 0;
      
              s = splnet();
              switch (cmd) {
              case SIOCSIFADDR:
                      ifp->if_flags |= IFF_UP;
                      if (!(ifp->if_flags & IFF_RUNNING))
                              vio_init(ifp);
                      break;
              case SIOCSIFFLAGS:
                      if (ifp->if_flags & IFF_UP) {
      #if VIRTIO_DEBUG
                              if (ifp->if_flags & IFF_DEBUG)
                                      vio_dump(sc);
      #endif
                              if (ifp->if_flags & IFF_RUNNING)
                                      r = ENETRESET;
                              else
                                      vio_init(ifp);
                      } else {
                              if (ifp->if_flags & IFF_RUNNING)
                                      vio_stop(ifp, 1);
                      }
                      break;
              case SIOCGIFMEDIA:
              case SIOCSIFMEDIA:
                      r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
                      break;
              case SIOCGIFRXR:
                      r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
                          NULL, MCLBYTES, &sc->sc_rx_ring);
                      break;
              default:
   16                 r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
              }
      
    3         if (r == ENETRESET) {
                      if (ifp->if_flags & IFF_RUNNING)
   14                         vio_iff(sc);
                      r = 0;
              }
              splx(s);
              return r;
      }
      
      /*
       * Recieve implementation
       */
      /* allocate and initialize a mbuf for receive */
      int
      vio_add_rx_mbuf(struct vio_softc *sc, int i)
      {
              struct mbuf *m;
              int r;
      
              m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
              if (m == NULL)
                      return ENOBUFS;
              sc->sc_rx_mbufs[i] = m;
              m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
              r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i],
                  m, BUS_DMA_READ|BUS_DMA_NOWAIT);
              if (r) {
                      m_freem(m);
                      sc->sc_rx_mbufs[i] = 0;
                      return r;
              }
      
              return 0;
      }
      
      /* free a mbuf for receive */
      void
      vio_free_rx_mbuf(struct vio_softc *sc, int i)
      {
              bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]);
              m_freem(sc->sc_rx_mbufs[i]);
              sc->sc_rx_mbufs[i] = NULL;
      }
      
      /* add mbufs for all the empty receive slots */
      void
      vio_populate_rx_mbufs(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              int r, done = 0;
              u_int slots;
              struct virtqueue *vq = &sc->sc_vq[VQRX];
              int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
      
              for (slots = if_rxr_get(&sc->sc_rx_ring, vq->vq_num);
                  slots > 0; slots--) {
                      int slot;
                      r = virtio_enqueue_prep(vq, &slot);
                      if (r == EAGAIN)
                              break;
                      if (r != 0)
                              panic("enqueue_prep for rx buffers: %d", r);
                      if (sc->sc_rx_mbufs[slot] == NULL) {
                              r = vio_add_rx_mbuf(sc, slot);
                              if (r != 0) {
                                      virtio_enqueue_abort(vq, slot);
                                      break;
                              }
                      }
                      r = virtio_enqueue_reserve(vq, slot,
                          sc->sc_rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
                      if (r != 0) {
                              vio_free_rx_mbuf(sc, slot);
                              break;
                      }
                      bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
                          MCLBYTES, BUS_DMASYNC_PREREAD);
                      if (mrg_rxbuf) {
                              virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0);
                      } else {
                              /*
                               * Buggy kvm wants a buffer of exactly the size of
                               * the header in this case, so we have to split in
                               * two.
                               */
                              virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
                                  0, sc->sc_hdr_size, 0);
                              virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot],
                                  sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0);
                      }
                      virtio_enqueue_commit(vsc, vq, slot, 0);
                      done = 1;
              }
              if_rxr_put(&sc->sc_rx_ring, slots);
      
              if (done)
                      virtio_notify(vsc, vq);
              timeout_add_sec(&sc->sc_rxtick, 1);
      }
      
      /* dequeue received packets */
      int
      vio_rxeof(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              struct virtqueue *vq = &sc->sc_vq[VQRX];
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              struct mbuf_list ml = MBUF_LIST_INITIALIZER();
              struct mbuf *m, *m0 = NULL, *mlast;
              int r = 0;
              int slot, len, bufs_left;
              struct virtio_net_hdr *hdr;
      
              while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
                      r = 1;
                      bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0,
                          MCLBYTES, BUS_DMASYNC_POSTREAD);
                      m = sc->sc_rx_mbufs[slot];
                      KASSERT(m != NULL);
                      bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]);
                      sc->sc_rx_mbufs[slot] = NULL;
                      virtio_dequeue_commit(vq, slot);
                      if_rxr_put(&sc->sc_rx_ring, 1);
                      m->m_len = m->m_pkthdr.len = len;
                      m->m_pkthdr.csum_flags = 0;
                      if (m0 == NULL) {
                              hdr = mtod(m, struct virtio_net_hdr *);
                              m_adj(m, sc->sc_hdr_size);
                              m0 = mlast = m;
                              if (VIO_HAVE_MRG_RXBUF(sc))
                                      bufs_left = hdr->num_buffers - 1;
                              else
                                      bufs_left = 0;
                      }
                      else {
                              m->m_flags &= ~M_PKTHDR;
                              m0->m_pkthdr.len += m->m_len;
                              mlast->m_next = m;
                              mlast = m;
                              bufs_left--;
                      }
      
                      if (bufs_left == 0) {
                              ml_enqueue(&ml, m0);
                              m0 = NULL;
                      }
              }
              if (m0 != NULL) {
                      DPRINTF("%s: expected %d buffers, got %d\n", __func__,
                          (int)hdr->num_buffers,
                          (int)hdr->num_buffers - bufs_left);
                      ifp->if_ierrors++;
                      m_freem(m0);
              }
      
              if_input(ifp, &ml);
              return r;
      }
      
      int
      vio_rx_intr(struct virtqueue *vq)
      {
              struct virtio_softc *vsc = vq->vq_owner;
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              int r, sum = 0;
      
      again:
              r = vio_rxeof(sc);
              sum += r;
              if (r) {
                      vio_populate_rx_mbufs(sc);
                      /* set used event index to the next slot */
                      if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) {
                              if (virtio_start_vq_intr(vq->vq_owner, vq))
                                      goto again;
                      }
              }
      
              return sum;
      }
      
      void
      vio_rxtick(void *arg)
      {
              struct virtqueue *vq = arg;
              struct virtio_softc *vsc = vq->vq_owner;
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              int s;
      
              s = splnet();
              vio_populate_rx_mbufs(sc);
              splx(s);
      }
      
      /* free all the mbufs; called from if_stop(disable) */
      void
      vio_rx_drain(struct vio_softc *sc)
      {
              struct virtqueue *vq = &sc->sc_vq[VQRX];
              int i;
      
              for (i = 0; i < vq->vq_num; i++) {
                      if (sc->sc_rx_mbufs[i] == NULL)
                              continue;
                      vio_free_rx_mbuf(sc, i);
              }
      }
      
      /*
       * Transmition implementation
       */
      /* actual transmission is done in if_start */
      /* tx interrupt; dequeue and free mbufs */
      /*
       * tx interrupt is actually disabled unless the tx queue is full, i.e.
       * IFF_OACTIVE is set. vio_txtick is used to make sure that mbufs
       * are dequeued and freed even if no further transfer happens.
       */
      int
      vio_tx_intr(struct virtqueue *vq)
      {
              struct virtio_softc *vsc = vq->vq_owner;
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              int r;
      
              r = vio_txeof(vq);
              vio_start(ifp);
              return r;
      }
      
      void
      vio_txtick(void *arg)
      {
              struct virtqueue *vq = arg;
              int s = splnet();
              vio_tx_intr(vq);
              splx(s);
      }
      
      int
      vio_txeof(struct virtqueue *vq)
      {
              struct virtio_softc *vsc = vq->vq_owner;
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              struct mbuf *m;
              int r = 0;
              int slot, len;
      
              while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
                      struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
                      r++;
                      VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size,
                          BUS_DMASYNC_POSTWRITE);
                      bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0,
                          sc->sc_tx_dmamaps[slot]->dm_mapsize,
                          BUS_DMASYNC_POSTWRITE);
                      m = sc->sc_tx_mbufs[slot];
                      bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]);
                      sc->sc_tx_mbufs[slot] = 0;
                      virtio_dequeue_commit(vq, slot);
                      m_freem(m);
              }
      
              if (r) {
                      ifq_clr_oactive(&ifp->if_snd);
                      virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]);
              }
              if (vq->vq_used_idx == vq->vq_avail_idx)
                      timeout_del(&sc->sc_txtick);
              else if (r)
                      timeout_add_sec(&sc->sc_txtick, 1);
              return r;
      }
      
      int
      vio_encap(struct vio_softc *sc, int slot, struct mbuf *m)
      {
              struct virtio_softc        *vsc = sc->sc_virtio;
              bus_dmamap_t                 dmap= sc->sc_tx_dmamaps[slot];
              int                         r;
      
              r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
                  BUS_DMA_WRITE|BUS_DMA_NOWAIT);
              switch (r) {
              case 0:
                      break;
              case EFBIG:
                      if (m_defrag(m, M_DONTWAIT) == 0 &&
                          bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m,
                          BUS_DMA_WRITE|BUS_DMA_NOWAIT) == 0)
                              break;
      
                      /* FALLTHROUGH */
              default:
                      return ENOBUFS;
              }
              sc->sc_tx_mbufs[slot] = m;
              return 0;
      }
      
      /* free all the mbufs already put on vq; called from if_stop(disable) */
      void
      vio_tx_drain(struct vio_softc *sc)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              struct virtqueue *vq = &sc->sc_vq[VQTX];
              int i;
      
              for (i = 0; i < vq->vq_num; i++) {
                      if (sc->sc_tx_mbufs[i] == NULL)
                              continue;
                      bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]);
                      m_freem(sc->sc_tx_mbufs[i]);
                      sc->sc_tx_mbufs[i] = NULL;
              }
      }
      
      /*
       * Control vq
       */
      /* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
      int
      vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
      {
              struct virtio_softc *vsc = sc->sc_virtio;
              struct virtqueue *vq = &sc->sc_vq[VQCTL];
              int r, slot;
      
              splassert(IPL_NET);
      
              if ((r = vio_wait_ctrl(sc)) != 0)
                      return r;
      
              sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
              sc->sc_ctrl_cmd->command = cmd;
              sc->sc_ctrl_rx->onoff = onoff;
      
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
                  sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_PREWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
      
              r = virtio_enqueue_prep(vq, &slot);
              if (r != 0)
                      panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
              r = virtio_enqueue_reserve(vq, slot, 3);
              if (r != 0)
                      panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), 1);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_rx,
                  sizeof(*sc->sc_ctrl_rx), 1);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), 0);
              virtio_enqueue_commit(vsc, vq, slot, 1);
      
              if ((r = vio_wait_ctrl_done(sc)) != 0)
                      goto out;
      
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
                  sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
      
              if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
                      r = 0;
              } else {
                      printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
                      r = EIO;
              }
      
              DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
      out:
              vio_ctrl_wakeup(sc, FREE);
              return r;
      }
      
      /*
       * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
       * NET_LOCK() deadlocks are possible.  So release it here.
       */
      static inline int
      vio_sleep(struct vio_softc *sc, const char *wmesg)
      {
              int status = rw_status(&netlock);
      
              if (status != RW_WRITE && status != RW_READ)
                      return tsleep(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg, 0);
      
              return rwsleep(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg, 0);
      }
      
      int
      vio_wait_ctrl(struct vio_softc *sc)
      {
              int r = 0;
      
              while (sc->sc_ctrl_inuse != FREE) {
                      r = vio_sleep(sc, "viowait");
                      if (r == EINTR)
                              return r;
              }
              sc->sc_ctrl_inuse = INUSE;
      
              return r;
      }
      
      int
      vio_wait_ctrl_done(struct vio_softc *sc)
      {
              int r = 0;
      
              while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
                      if (sc->sc_ctrl_inuse == RESET) {
                              r = 1;
                              break;
                      }
                      r = vio_sleep(sc, "viodone");
                      if (r == EINTR)
                              break;
              }
              return r;
      }
      
      void
      vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
      {
              sc->sc_ctrl_inuse = new;
              wakeup(&sc->sc_ctrl_inuse);
      }
      
      int
      vio_ctrleof(struct virtqueue *vq)
      {
              struct virtio_softc *vsc = vq->vq_owner;
              struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
              int r = 0, ret, slot;
      
      again:
              ret = virtio_dequeue(vsc, vq, &slot, NULL);
              if (ret == ENOENT)
                      return r;
              virtio_dequeue_commit(vq, slot);
              r++;
              vio_ctrl_wakeup(sc, DONE);
              if (virtio_start_vq_intr(vsc, vq))
                      goto again;
      
              return r;
      }
      
      /* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
      int
      vio_set_rx_filter(struct vio_softc *sc)
      {
              /* filter already set in sc_ctrl_mac_tbl */
              struct virtio_softc *vsc = sc->sc_virtio;
              struct virtqueue *vq = &sc->sc_vq[VQCTL];
              int r, slot;
      
              splassert(IPL_NET);
      
              if ((r = vio_wait_ctrl(sc)) != 0)
                      return r;
      
              sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
              sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
      
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
                  VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_PREWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
      
              r = virtio_enqueue_prep(vq, &slot);
              if (r != 0)
                      panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
              r = virtio_enqueue_reserve(vq, slot, 4);
              if (r != 0)
                      panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), 1);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
                  sizeof(*sc->sc_ctrl_mac_tbl_uc) +
                  sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
                  sizeof(*sc->sc_ctrl_mac_tbl_mc) +
                  sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
              VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), 0);
              virtio_enqueue_commit(vsc, vq, slot, 1);
      
              if ((r = vio_wait_ctrl_done(sc)) != 0)
                      goto out;
      
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
                  sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
                  VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
              VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
                  sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
      
              if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
                      r = 0;
              } else {
                      /* The host's filter table is not large enough */
                      printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
                      r = EIO;
              }
      
      out:
              vio_ctrl_wakeup(sc, FREE);
              return r;
      }
      
      void
      vio_iff(struct vio_softc *sc)
   14 {
              struct virtio_softc *vsc = sc->sc_virtio;
              struct ifnet *ifp = &sc->sc_ac.ac_if;
              struct arpcom *ac = &sc->sc_ac;
              struct ether_multi *enm;
              struct ether_multistep step;
              int nentries = 0;
              int promisc = 0, allmulti = 0, rxfilter = 0;
              int r;
      
   14         splassert(IPL_NET);
      
              ifp->if_flags &= ~IFF_ALLMULTI;
      
              if (vsc->sc_nvqs < 3) {
                      /* no ctrl vq; always promisc */
   14                 ifp->if_flags |= IFF_ALLMULTI | IFF_PROMISC;
                      return;
              }
      
              if (sc->sc_dev.dv_cfdata->cf_flags & CONFFLAG_QEMU_VLAN_BUG)
                      ifp->if_flags |= IFF_PROMISC;
      
              if (ifp->if_flags & IFF_PROMISC || ac->ac_multirangecnt > 0 ||
                  ac->ac_multicnt >= VIRTIO_NET_CTRL_MAC_MC_ENTRIES) {
                      ifp->if_flags |= IFF_ALLMULTI;
                      if (ifp->if_flags & IFF_PROMISC)
                              promisc = 1;
                      else
                              allmulti = 1;
              } else {
                      rxfilter = 1;
      
                      ETHER_FIRST_MULTI(step, ac, enm);
                      while (enm != NULL) {
                              memcpy(sc->sc_ctrl_mac_tbl_mc->macs[nentries++],
                                  enm->enm_addrlo, ETHER_ADDR_LEN);
      
                              ETHER_NEXT_MULTI(step, enm);
                      }
              }
      
              /* set unicast address, VirtualBox wants that */
              memcpy(sc->sc_ctrl_mac_tbl_uc->macs[0], ac->ac_enaddr, ETHER_ADDR_LEN);
              sc->sc_ctrl_mac_tbl_uc->nentries = 1;
      
              sc->sc_ctrl_mac_tbl_mc->nentries = rxfilter ? nentries : 0;
      
              if (vsc->sc_nvqs < 3)
                      return;
      
              r = vio_set_rx_filter(sc);
              if (r == EIO)
                      allmulti = 1; /* fallback */
              else if (r != 0)
                      return;
      
              r = vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, allmulti);
              if (r == EIO)
                      promisc = 1; /* fallback */
              else if (r != 0)
                      return;
      
              vio_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, promisc);
      }
      /*        $OpenBSD: sysv_sem.c,v 1.56 2019/02/04 07:04:28 anton Exp $        */
      /*        $NetBSD: sysv_sem.c,v 1.26 1996/02/09 19:00:25 christos Exp $        */
      
      /*
       * Copyright (c) 2002,2003 Todd C. Miller <millert@openbsd.org>
       *
       * Permission to use, copy, modify, and distribute this software for any
       * purpose with or without fee is hereby granted, provided that the above
       * copyright notice and this permission notice appear in all copies.
       *
       * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       *
       * Sponsored in part by the Defense Advanced Research Projects
       * Agency (DARPA) and Air Force Research Laboratory, Air Force
       * Materiel Command, USAF, under agreement number F39502-99-1-0512.
       */
      /*
       * Implementation of SVID semaphores
       *
       * Author:  Daniel Boulet
       *
       * This software is provided ``AS IS'' without any warranties of any kind.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/proc.h>
      #include <sys/sem.h>
      #include <sys/sysctl.h>
      #include <sys/malloc.h>
      #include <sys/pool.h>
      
      #include <sys/mount.h>
      #include <sys/syscallargs.h>
      
      #ifdef SEM_DEBUG
      #define DPRINTF(x)        printf x
      #else
      #define DPRINTF(x)
      #endif
      
      int        semtot = 0;
      int        semutot = 0;
      struct        semid_ds **sema;        /* semaphore id list */
      SLIST_HEAD(, sem_undo) semu_list; /* list of undo structures */
      struct        pool sema_pool;                /* pool for struct semid_ds */
      struct        pool semu_pool;                /* pool for struct sem_undo (SEMUSZ) */
      unsigned short *semseqs;        /* array of sem sequence numbers */
      
      struct sem_undo *semu_alloc(struct process *);
      int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
      void semundo_clear(int, int);
      
      void
      seminit(void)
      {
      
              pool_init(&sema_pool, sizeof(struct semid_ds), 0, 0, PR_WAITOK,
                  "semapl", NULL);
              pool_init(&semu_pool, SEMUSZ, 0, 0, PR_WAITOK, "semupl", NULL);
              sema = mallocarray(seminfo.semmni, sizeof(struct semid_ds *),
                  M_SEM, M_WAITOK|M_ZERO);
              semseqs = mallocarray(seminfo.semmni, sizeof(unsigned short),
                  M_SEM, M_WAITOK|M_ZERO);
              SLIST_INIT(&semu_list);
      }
      
      /*
       * Allocate a new sem_undo structure for a process
       * (returns ptr to structure or NULL if no more room)
       */
      struct sem_undo *
      semu_alloc(struct process *pr)
      {
              struct sem_undo *suptr, *sutmp;
      
              if (semutot == seminfo.semmnu)
                      return (NULL);                /* no space */
      
              /*
               * Allocate a semu w/o waiting if possible.
               * If we do have to wait, we must check to verify that a semu
               * with un_proc == pr has not been allocated in the meantime.
               */
              semutot++;
              if ((suptr = pool_get(&semu_pool, PR_NOWAIT)) == NULL) {
                      sutmp = pool_get(&semu_pool, PR_WAITOK);
                      SLIST_FOREACH(suptr, &semu_list, un_next) {
                              if (suptr->un_proc == pr) {
                                      pool_put(&semu_pool, sutmp);
                                      semutot--;
                                      return (suptr);
                              }
                      }
                      suptr = sutmp;
              }
              suptr->un_cnt = 0;
              suptr->un_proc = pr;
    6         SLIST_INSERT_HEAD(&semu_list, suptr, un_next);
              return (suptr);
      }
      
      /*
       * Adjust a particular entry for a particular proc
       */
      int
      semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
              int adjval)
    7 {
              struct process *pr = p->p_p;
              struct sem_undo *suptr;
              struct undo *sunptr;
              int i;
      
              /*
               * Look for and remember the sem_undo if the caller doesn't provide it.
               */
              suptr = *supptr;
    3         if (suptr == NULL) {
    6                 SLIST_FOREACH(suptr, &semu_list, un_next) {
    1                         if (suptr->un_proc == pr) {
                                      *supptr = suptr;
                                      break;
                              }
                      }
                      if (suptr == NULL) {
                              if (adjval == 0)
                                      return (0);
    6                         suptr = semu_alloc(p->p_p);
    6                         if (suptr == NULL)
                                      return (ENOSPC);
                              *supptr = suptr;
                      }
              }
      
              /*
               * Look for the requested entry and adjust it
               * (delete if adjval becomes 0).
               */
              sunptr = &suptr->un_ent[0];
    7         for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
    3                 if (sunptr->un_id != semid || sunptr->un_num != semnum)
                              continue;
                      if (adjval == 0)
                              sunptr->un_adjval = 0;
                      else
                              sunptr->un_adjval += adjval;
    2                 if (sunptr->un_adjval != 0)
                              return (0);
      
                      if (--suptr->un_cnt == 0) {
                              *supptr = NULL;
    1                         SLIST_REMOVE(&semu_list, suptr, sem_undo, un_next);
                              pool_put(&semu_pool, suptr);
                              semutot--;
    1                 } else if (i < suptr->un_cnt)
                              suptr->un_ent[i] =
                                  suptr->un_ent[suptr->un_cnt];
                      return (0);
              }
      
              /* Didn't find the right entry - create it */
              if (adjval == 0)
                      return (0);
              if (suptr->un_cnt == SEMUME)
                      return (EINVAL);
      
    7         sunptr = &suptr->un_ent[suptr->un_cnt];
              suptr->un_cnt++;
              sunptr->un_adjval = adjval;
              sunptr->un_id = semid;
              sunptr->un_num = semnum;
              return (0);
      }
      
      void
      semundo_clear(int semid, int semnum)
    5 {
              struct sem_undo *suptr = SLIST_FIRST(&semu_list);
    3         struct sem_undo *suprev = NULL;
              struct undo *sunptr;
              int i;
      
    8         while (suptr != NULL) {
                      sunptr = &suptr->un_ent[0];
    3                 for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
    2                         if (sunptr->un_id == semid) {
    2                                 if (semnum == -1 || sunptr->un_num == semnum) {
                                              suptr->un_cnt--;
    2                                         if (i < suptr->un_cnt) {
                                                      suptr->un_ent[i] =
                                                        suptr->un_ent[suptr->un_cnt];
                                                      i--, sunptr--;
                                              }
                                      }
    1                                 if (semnum != -1)
                                              break;
                              }
                      }
                      if (suptr->un_cnt == 0) {
                              struct sem_undo *sutmp = suptr;
      
                              if (suptr == SLIST_FIRST(&semu_list))
    2                                 SLIST_REMOVE_HEAD(&semu_list, un_next);
                              else
                                      SLIST_REMOVE_AFTER(suprev, un_next);
                              suptr = SLIST_NEXT(suptr, un_next);
                              pool_put(&semu_pool, sutmp);
                              semutot--;
                      } else {
                              suprev = suptr;
                              suptr = SLIST_NEXT(suptr, un_next);
                      }
              }
      }
      
      int
      sys___semctl(struct proc *p, void *v, register_t *retval)
   43 {
              struct sys___semctl_args /* {
                      syscallarg(int) semid;
                      syscallarg(int) semnum;
                      syscallarg(int) cmd;
                      syscallarg(union semun *) arg;
              } */ *uap = v;
              union semun arg;
              int error = 0, cmd = SCARG(uap, cmd);
      
   43         switch (cmd) {
              case IPC_SET:
              case IPC_STAT:
              case GETALL:
              case SETVAL:
              case SETALL:
                      error = copyin(SCARG(uap, arg), &arg, sizeof(arg));
                      break;
              }
   30         if (error == 0) {
                      error = semctl1(p, SCARG(uap, semid), SCARG(uap, semnum),
                          cmd, &arg, retval, copyin, copyout);
              }
              return (error);
      }
      
      int
      semctl1(struct proc *p, int semid, int semnum, int cmd, union semun *arg,
          register_t *retval, int (*ds_copyin)(const void *, void *, size_t),
          int (*ds_copyout)(const void *, void *, size_t))
   37 {
              struct ucred *cred = p->p_ucred;
              int i, ix, error = 0;
              struct semid_ds sbuf;
              struct semid_ds *semaptr;
              unsigned short *semval = NULL;
      
              DPRINTF(("call to semctl(%d, %d, %d, %p)\n", semid, semnum, cmd, arg));
      
              ix = IPCID_TO_IX(semid);
    1         if (ix < 0 || ix >= seminfo.semmni)
                      return (EINVAL);
      
   18         if ((semaptr = sema[ix]) == NULL ||
                  semaptr->sem_perm.seq != IPCID_TO_SEQ(semid))
                      return (EINVAL);
      
    1         switch (cmd) {
              case IPC_RMID:
                      if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
                              return (error);
                      semaptr->sem_perm.cuid = cred->cr_uid;
                      semaptr->sem_perm.uid = cred->cr_uid;
                      semtot -= semaptr->sem_nsems;
                      free(semaptr->sem_base, M_SEM,
                          semaptr->sem_nsems * sizeof(struct sem));
                      pool_put(&sema_pool, semaptr);
                      sema[ix] = NULL;
    3                 semundo_clear(ix, -1);
                      wakeup(&sema[ix]);
                      break;
      
              case IPC_SET:
                      if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
                              return (error);
    1                 if ((error = ds_copyin(arg->buf, &sbuf, sizeof(sbuf))) != 0)
                              return (error);
                      semaptr->sem_perm.uid = sbuf.sem_perm.uid;
                      semaptr->sem_perm.gid = sbuf.sem_perm.gid;
                      semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
                          (sbuf.sem_perm.mode & 0777);
                      semaptr->sem_ctime = time_second;
                      break;
      
              case IPC_STAT:
    1                 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
                      error = ds_copyout(semaptr, arg->buf, sizeof(struct semid_ds));
                      break;
      
              case GETNCNT:
                      if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
    1                 if (semnum < 0 || semnum >= semaptr->sem_nsems)
                              return (EINVAL);
    1                 *retval = semaptr->sem_base[semnum].semncnt;
                      break;
      
              case GETPID:
    1                 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
    1                 if (semnum < 0 || semnum >= semaptr->sem_nsems)
                              return (EINVAL);
                      *retval = semaptr->sem_base[semnum].sempid;
                      break;
      
              case GETVAL:
    1                 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
                      if (semnum < 0 || semnum >= semaptr->sem_nsems)
                              return (EINVAL);
                      *retval = semaptr->sem_base[semnum].semval;
                      break;
      
              case GETALL:
                      if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
    1                 for (i = 0; i < semaptr->sem_nsems; i++) {
                              error = ds_copyout(&semaptr->sem_base[i].semval,
                                  &arg->array[i], sizeof(arg->array[0]));
    1                         if (error != 0)
                                      break;
                      }
                      break;
      
              case GETZCNT:
                      if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                              return (error);
                      if (semnum < 0 || semnum >= semaptr->sem_nsems)
                              return (EINVAL);
    1                 *retval = semaptr->sem_base[semnum].semzcnt;
                      break;
      
              case SETVAL:
    2                 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
                              return (error);
                      if (semnum < 0 || semnum >= semaptr->sem_nsems)
                              return (EINVAL);
                      if (arg->val > seminfo.semvmx)
                              return (ERANGE);
    5                 semaptr->sem_base[semnum].semval = arg->val;
                      semundo_clear(ix, semnum);
                      wakeup(&sema[ix]);
                      break;
      
              case SETALL:
    1                 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
                              return (error);
                      semval = mallocarray(semaptr->sem_nsems, sizeof(arg->array[0]),
                          M_TEMP, M_WAITOK);
                      for (i = 0; i < semaptr->sem_nsems; i++) {
                              error = ds_copyin(&arg->array[i], &semval[i],
                                  sizeof(arg->array[0]));
                              if (error != 0)
                                      goto error;
                              if (semval[i] > seminfo.semvmx) {
                                      error = ERANGE;
                                      goto error;
                              }
                      }
                      for (i = 0; i < semaptr->sem_nsems; i++)
                              semaptr->sem_base[i].semval = semval[i];
                      semundo_clear(ix, -1);
                      wakeup(&sema[ix]);
                      break;
      
              default:
                      return (EINVAL);
              }
      
      error:
              if (semval)
                      free(semval, M_TEMP,
                          semaptr->sem_nsems * sizeof(arg->array[0]));
      
              return (error);
      }
      
      int
      sys_semget(struct proc *p, void *v, register_t *retval)
   14 {
              struct sys_semget_args /* {
                      syscallarg(key_t) key;
                      syscallarg(int) nsems;
                      syscallarg(int) semflg;
              } */ *uap = v;
              int semid, error;
              int key = SCARG(uap, key);
              int nsems = SCARG(uap, nsems);
              int semflg = SCARG(uap, semflg);
              struct semid_ds *semaptr, *semaptr_new = NULL;
              struct ucred *cred = p->p_ucred;
      
              DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
      
              /*
               * Preallocate space for the new semaphore.  If we are going
               * to sleep, we want to sleep now to eliminate any race
               * condition in allocating a semaphore with a specific key.
               */
    7         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
    1                 if (nsems <= 0 || nsems > seminfo.semmsl) {
                              DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
                                  seminfo.semmsl));
                              return (EINVAL);
                      }
    1                 if (nsems > seminfo.semmns - semtot) {
                              DPRINTF(("not enough semaphores left (need %d, got %d)\n",
                                  nsems, seminfo.semmns - semtot));
                              return (ENOSPC);
                      }
    6                 semaptr_new = pool_get(&sema_pool, PR_WAITOK);
                      semaptr_new->sem_base = mallocarray(nsems, sizeof(struct sem),
                          M_SEM, M_WAITOK|M_ZERO);
              }
      
    2         if (key != IPC_PRIVATE) {
    7                 for (semid = 0, semaptr = NULL; semid < seminfo.semmni; semid++) {
    7                         if ((semaptr = sema[semid]) != NULL &&
                                  semaptr->sem_perm.key == key) {
                                      DPRINTF(("found public key\n"));
    3                                 if ((error = ipcperm(cred, &semaptr->sem_perm,
                                          semflg & 0700)))
                                              goto error;
    6                                 if (nsems > 0 && semaptr->sem_nsems < nsems) {
                                              DPRINTF(("too small\n"));
                                              error = EINVAL;
                                              goto error;
                                      }
    1                                 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
                                              DPRINTF(("not exclusive\n"));
                                              error = EEXIST;
                                              goto error;
                                      }
    2                                 if (semaptr_new != NULL) {
    2                                         free(semaptr_new->sem_base, M_SEM,
                                                  nsems * sizeof(struct sem));
                                              pool_put(&sema_pool, semaptr_new);
                                      }
                                      goto found;
                              }
                      }
              }
      
              DPRINTF(("need to allocate the semid_ds\n"));
    2         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
    3                 for (semid = 0; semid < seminfo.semmni; semid++) {
    1                         if ((semaptr = sema[semid]) == NULL)
                                      break;
                      }
    2                 if (semid == seminfo.semmni) {
                              DPRINTF(("no more semid_ds's available\n"));
                              error = ENOSPC;
                              goto error;
                      }
                      DPRINTF(("semid %d is available\n", semid));
    1                 semaptr_new->sem_perm.key = key;
                      semaptr_new->sem_perm.cuid = cred->cr_uid;
                      semaptr_new->sem_perm.uid = cred->cr_uid;
                      semaptr_new->sem_perm.cgid = cred->cr_gid;
                      semaptr_new->sem_perm.gid = cred->cr_gid;
                      semaptr_new->sem_perm.mode = (semflg & 0777);
                      semaptr_new->sem_perm.seq = semseqs[semid] =
                          (semseqs[semid] + 1) & 0x7fff;
                      semaptr_new->sem_nsems = nsems;
                      semaptr_new->sem_otime = 0;
                      semaptr_new->sem_ctime = time_second;
                      sema[semid] = semaptr_new;
                      semtot += nsems;
              } else {
                      DPRINTF(("didn't find it and wasn't asked to create it\n"));
                      return (ENOENT);
              }
      
      found:
              *retval = IXSEQ_TO_IPCID(semid, sema[semid]->sem_perm);
              return (0);
      error:
    3         if (semaptr_new != NULL) {
    3                 free(semaptr_new->sem_base, M_SEM, nsems * sizeof(struct sem));
                      pool_put(&sema_pool, semaptr_new);
              }
              return (error);
      }
      
      int
      sys_semop(struct proc *p, void *v, register_t *retval)
   25 {
              struct sys_semop_args /* {
                      syscallarg(int) semid;
                      syscallarg(struct sembuf *) sops;
                      syscallarg(size_t) nsops;
              } */ *uap = v;
      #define        NSOPS        8
              struct sembuf sopbuf[NSOPS];
              int semid = SCARG(uap, semid);
              size_t nsops = SCARG(uap, nsops);
              struct sembuf *sops;
              struct semid_ds *semaptr;
              struct sembuf *sopptr = NULL;
              struct sem *semptr = NULL;
              struct sem_undo *suptr = NULL;
              struct ucred *cred = p->p_ucred;
    7         size_t i, j;
              int do_wakeup, do_undos, error;
      
              DPRINTF(("call to semop(%d, %p, %lu)\n", semid, SCARG(uap, sops),
                  (u_long)nsops));
      
              semid = IPCID_TO_IX(semid);        /* Convert back to zero origin */
      
    1         if (semid < 0 || semid >= seminfo.semmni)
                      return (EINVAL);
      
    4         if ((semaptr = sema[semid]) == NULL ||
                  semaptr->sem_perm.seq != IPCID_TO_SEQ(SCARG(uap, semid)))
                      return (EINVAL);
      
              if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
                      DPRINTF(("error = %d from ipaccess\n", error));
                      return (error);
              }
      
              if (nsops == 0) {
    1                 *retval = 0;
                      return (0);
              } else if (nsops > (size_t)seminfo.semopm) {
                      DPRINTF(("too many sops (max=%d, nsops=%lu)\n", seminfo.semopm,
                          (u_long)nsops));
                      return (E2BIG);
              }
      
              if (nsops <= NSOPS)
   16                 sops = sopbuf;
              else
    3                 sops = mallocarray(nsops, sizeof(struct sembuf), M_SEM, M_WAITOK);
              error = copyin(SCARG(uap, sops), sops, nsops * sizeof(struct sembuf));
              if (error != 0) {
                      DPRINTF(("error = %d from copyin(%p, %p, %u)\n", error,
                          SCARG(uap, sops), &sops, nsops * sizeof(struct sembuf)));
                      goto done2;
              }
      
              /* 
               * Loop trying to satisfy the vector of requests.
               * If we reach a point where we must wait, any requests already
               * performed are rolled back and we go to sleep until some other
               * process wakes us up.  At this point, we start all over again.
               *
               * This ensures that from the perspective of other tasks, a set
               * of requests is atomic (never partially satisfied).
               */
              do_undos = 0;
      
              for (;;) {
                      do_wakeup = 0;
      
   15                 for (i = 0; i < nsops; i++) {
                              sopptr = &sops[i];
      
                              if (sopptr->sem_num >= semaptr->sem_nsems) {
                                      error = EFBIG;
                                      goto done2;
                              }
      
                              semptr = &semaptr->sem_base[sopptr->sem_num];
      
                              DPRINTF(("semop:  semaptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
                                  semaptr, semaptr->sem_base, semptr,
                                  sopptr->sem_num, semptr->semval, sopptr->sem_op,
                                  (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait"));
      
                              if (sopptr->sem_op < 0) {
    5                                 if ((int)(semptr->semval +
                                                sopptr->sem_op) < 0) {
                                              DPRINTF(("semop:  can't do it now\n"));
                                              break;
                                      } else {
                                              semptr->semval += sopptr->sem_op;
    2                                         if (semptr->semval == 0 &&
    3                                             semptr->semzcnt > 0)
                                                      do_wakeup = 1;
                                      }
                                      if (sopptr->sem_flg & SEM_UNDO)
                                              do_undos++;
                              } else if (sopptr->sem_op == 0) {
    8                                 if (semptr->semval > 0) {
                                              DPRINTF(("semop:  not zero now\n"));
                                              break;
                                      }
                              } else {
   13                                 if (semptr->semncnt > 0)
                                              do_wakeup = 1;
                                      semptr->semval += sopptr->sem_op;
                                      if (sopptr->sem_flg & SEM_UNDO)
                                              do_undos++;
                              }
                      }
      
                      /*
                       * Did we get through the entire vector and can we undo it?
                       */
                      if (i >= nsops && do_undos <= SEMUME)
                              goto done;
      
                      /*
                       * No ... rollback anything that we've already done
                       */
                      DPRINTF(("semop:  rollback 0 through %d\n", i - 1));
    6                 for (j = 0; j < i; j++)
                              semaptr->sem_base[sops[j].sem_num].semval -=
    2                             sops[j].sem_op;
      
                      /*
                       * Did we have too many SEM_UNDO's
                       */
    1                 if (do_undos > SEMUME) {
                              error = ENOSPC;
                              goto done2;
                      }
      
                      /*
                       * If the request that we couldn't satisfy has the
                       * NOWAIT flag set then return with EAGAIN.
                       */
    3                 if (sopptr->sem_flg & IPC_NOWAIT) {
                              error = EAGAIN;
                              goto done2;
                      }
      
                      if (sopptr->sem_op == 0)
    2                         semptr->semzcnt++;
                      else
    4                         semptr->semncnt++;
      
                      DPRINTF(("semop:  good night!\n"));
                      error = tsleep(&sema[semid], PLOCK | PCATCH,
                          "semwait", 0);
                      DPRINTF(("semop:  good morning (error=%d)!\n", error));
      
    2                 suptr = NULL;        /* sem_undo may have been reallocated */
      
                      /*
                       * Make sure that the semaphore still exists
                       */
    1                 if (sema[semid] == NULL ||
                          semaptr->sem_perm.seq != IPCID_TO_SEQ(SCARG(uap, semid))) {
                              error = EIDRM;
                              goto done2;
                      }
      
                      /*
                       * The semaphore is still alive.  Readjust the count of
                       * waiting processes.
                       */
                      if (sopptr->sem_op == 0)
                              semptr->semzcnt--;
                      else
    3                         semptr->semncnt--;
      
                      /*
                       * Is it really morning, or was our sleep interrupted?
                       * (Delayed check of tsleep() return code because we
                       * need to decrement sem[nz]cnt either way.)
                       */
    3                 if (error != 0) {
                              error = EINTR;
                              goto done2;
                      }
                      DPRINTF(("semop:  good morning!\n"));
              }
      
      done:
              /*
               * Process any SEM_UNDO requests.
               */
    2         if (do_undos) {
    7                 for (i = 0; i < nsops; i++) {
                              /*
                               * We only need to deal with SEM_UNDO's for non-zero
                               * op's.
                               */
                              int adjval;
      
    3                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
                                      continue;
                              adjval = sops[i].sem_op;
    3                         if (adjval == 0)
                                      continue;
                              error = semundo_adjust(p, &suptr, semid,
                                  sops[i].sem_num, -adjval);
    7                         if (error == 0)
                                      continue;
      
                              /*
                               * Uh-Oh!  We ran out of either sem_undo's or undo's.
                               * Rollback the adjustments to this point and then
                               * rollback the semaphore ups and down so we can return
                               * with an error with all structures restored.  We
                               * rollback the undo's in the exact reverse order that
                               * we applied them.  This guarantees that we won't run
                               * out of space as we roll things back out.
                               */
                              for (j = i; j > 0;) {
                                      j--;
                                      if ((sops[j].sem_flg & SEM_UNDO) == 0)
                                              continue;
                                      adjval = sops[j].sem_op;
                                      if (adjval == 0)
                                              continue;
                                      if (semundo_adjust(p, &suptr, semid,
                                          sops[j].sem_num, adjval) != 0)
                                              panic("semop - can't undo undos");
                              }
      
                              for (j = 0; j < nsops; j++)
                                      semaptr->sem_base[sops[j].sem_num].semval -=
                                          sops[j].sem_op;
      
                              DPRINTF(("error = %d from semundo_adjust\n", error));
                              goto done2;
                      } /* loop through the sops */
              } /* if (do_undos) */
      
              /* We're definitely done - set the sempid's */
    9         for (i = 0; i < nsops; i++) {
                      sopptr = &sops[i];
                      semptr = &semaptr->sem_base[sopptr->sem_num];
                      semptr->sempid = p->p_p->ps_pid;
              }
      
              semaptr->sem_otime = time_second;
      
              /* Do a wakeup if any semaphore was up'd. */
    9         if (do_wakeup) {
                      DPRINTF(("semop:  doing wakeup\n"));
                      wakeup(&sema[semid]);
                      DPRINTF(("semop:  back from wakeup\n"));
              }
              DPRINTF(("semop:  done\n"));
              *retval = 0;
      done2:
   15         if (sops != sopbuf)
    2                 free(sops, M_SEM, nsops * sizeof(struct sembuf));
              return (error);
      }
      
      /*
       * Go through the undo structures for this process and apply the adjustments to
       * semaphores.
       */
      void
      semexit(struct process *pr)
      {
              struct sem_undo *suptr;
              struct sem_undo **supptr;
      
              /*
               * Go through the chain of undo vectors looking for one associated with
               * this process.  Remember the pointer to the pointer to the element
               * to dequeue it later.
               */
              supptr = &SLIST_FIRST(&semu_list);
              SLIST_FOREACH(suptr, &semu_list, un_next) {
                      if (suptr->un_proc == pr)
                              break;
                      supptr = &SLIST_NEXT(suptr, un_next);
              }
      
              /*
               * If there is no undo vector, skip to the end.
               */
              if (suptr == NULL)
                      return;
      
              /*
               * We now have an undo vector for this process.
               */
              DPRINTF(("process @%p has undo structure with %d entries\n", pr,
                  suptr->un_cnt));
      
              /*
               * If there are any active undo elements then process them.
               */
              if (suptr->un_cnt > 0) {
                      int ix;
      
                      for (ix = 0; ix < suptr->un_cnt; ix++) {
                              int semid = suptr->un_ent[ix].un_id;
                              int semnum = suptr->un_ent[ix].un_num;
                              int adjval = suptr->un_ent[ix].un_adjval;
                              struct semid_ds *semaptr;
      
                              if ((semaptr = sema[semid]) == NULL)
                                      panic("semexit - semid not allocated");
                              if (semnum >= semaptr->sem_nsems)
                                      panic("semexit - semnum out of range");
      
                              DPRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; sem=%d\n",
                                  suptr->un_proc, suptr->un_ent[ix].un_id,
                                  suptr->un_ent[ix].un_num,
                                  suptr->un_ent[ix].un_adjval,
                                  semaptr->sem_base[semnum].semval));
      
                              if (adjval < 0 &&
                                  semaptr->sem_base[semnum].semval < -adjval)
                                      semaptr->sem_base[semnum].semval = 0;
                              else
                                      semaptr->sem_base[semnum].semval += adjval;
      
                              wakeup(&sema[semid]);
                              DPRINTF(("semexit:  back from wakeup\n"));
                      }
              }
      
              /*
               * Deallocate the undo vector.
               */
              DPRINTF(("removing vector\n"));
              *supptr = SLIST_NEXT(suptr, un_next);
              pool_put(&semu_pool, suptr);
              semutot--;
      }
      
      /*
       * Userland access to struct seminfo.
       */
      int
      sysctl_sysvsem(int *name, u_int namelen, void *oldp, size_t *oldlenp,
              void *newp, size_t newlen)
      {
              int error, val;
              struct semid_ds **sema_new;
              unsigned short *newseqs;
      
              if (namelen != 2) {
                      switch (name[0]) {
                      case KERN_SEMINFO_SEMMNI:
                      case KERN_SEMINFO_SEMMNS:
                      case KERN_SEMINFO_SEMMNU:
                      case KERN_SEMINFO_SEMMSL:
                      case KERN_SEMINFO_SEMOPM:
                      case KERN_SEMINFO_SEMUME:
                      case KERN_SEMINFO_SEMUSZ:
                      case KERN_SEMINFO_SEMVMX:
                      case KERN_SEMINFO_SEMAEM:
                              break;
                      default:
                              return (ENOTDIR);       /* overloaded */
                      }
              }
      
              switch (name[0]) {
              case KERN_SEMINFO_SEMMNI:
                      val = seminfo.semmni;
                      if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) ||
                          val == seminfo.semmni)
                              return (error);
      
                      if (val < seminfo.semmni || val > 0xffff)
                              return (EINVAL);
      
                      /* Expand semsegs and semseqs arrays */
                      sema_new = mallocarray(val, sizeof(struct semid_ds *),
                          M_SEM, M_WAITOK|M_ZERO);
                      memcpy(sema_new, sema,
                          seminfo.semmni * sizeof(struct semid_ds *));
                      newseqs = mallocarray(val, sizeof(unsigned short), M_SEM,
                          M_WAITOK|M_ZERO);
                      memcpy(newseqs, semseqs,
                          seminfo.semmni * sizeof(unsigned short));
                      free(sema, M_SEM, seminfo.semmni * sizeof(struct semid_ds *));
                      free(semseqs, M_SEM, seminfo.semmni * sizeof(unsigned short));
                      sema = sema_new;
                      semseqs = newseqs;
                      seminfo.semmni = val;
                      return (0);
              case KERN_SEMINFO_SEMMNS:
                      val = seminfo.semmns;
                      if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) ||
                          val == seminfo.semmns)
                              return (error);
                      if (val < seminfo.semmns || val > 0xffff)
                              return (EINVAL);        /* can't decrease semmns */
                      seminfo.semmns = val;
                      return (0);
              case KERN_SEMINFO_SEMMNU:
                      val = seminfo.semmnu;
                      if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) ||
                          val == seminfo.semmnu)
                              return (error);
                      if (val < seminfo.semmnu)
                              return (EINVAL);        /* can't decrease semmnu */
                      seminfo.semmnu = val;
                      return (0);
              case KERN_SEMINFO_SEMMSL:
                      val = seminfo.semmsl;
                      if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) ||
                          val == seminfo.semmsl)
                              return (error);
                      if (val < seminfo.semmsl || val > 0xffff)
                              return (EINVAL);        /* can't decrease semmsl */
                      seminfo.semmsl = val;
                      return (0);
              case KERN_SEMINFO_SEMOPM:
                      val = seminfo.semopm;
                      if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &val)) ||
                          val == seminfo.semopm)
                              return (error);
                      if (val <= 0)
                              return (EINVAL);        /* semopm must be >= 1 */
                      seminfo.semopm = val;
                      return (0);
              case KERN_SEMINFO_SEMUME:
                      return (sysctl_rdint(oldp, oldlenp, newp, seminfo.semume));
              case KERN_SEMINFO_SEMUSZ:
                      return (sysctl_rdint(oldp, oldlenp, newp, seminfo.semusz));
              case KERN_SEMINFO_SEMVMX:
                      return (sysctl_rdint(oldp, oldlenp, newp, seminfo.semvmx));
              case KERN_SEMINFO_SEMAEM:
                      return (sysctl_rdint(oldp, oldlenp, newp, seminfo.semaem));
              default:
                      return (EOPNOTSUPP);
              }
              /* NOTREACHED */
      }
      /*        $OpenBSD: subr_pool.c,v 1.228 2019/07/19 09:03:03 bluhm Exp $        */
      /*        $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $        */
      
      /*-
       * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
       * All rights reserved.
       *
       * This code is derived from software contributed to The NetBSD Foundation
       * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
       * Simulation Facility, NASA Ames Research Center.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
       * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
       * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
       * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       * POSSIBILITY OF SUCH DAMAGE.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/errno.h>
      #include <sys/kernel.h>
      #include <sys/malloc.h>
      #include <sys/pool.h>
      #include <sys/proc.h>
      #include <sys/syslog.h>
      #include <sys/sysctl.h>
      #include <sys/task.h>
      #include <sys/timeout.h>
      #include <sys/percpu.h>
      
      #include <uvm/uvm_extern.h>
      
      /*
       * Pool resource management utility.
       *
       * Memory is allocated in pages which are split into pieces according to
       * the pool item size. Each page is kept on one of three lists in the
       * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
       * for empty, full and partially-full pages respectively. The individual
       * pool items are on a linked list headed by `ph_items' in each page
       * header. The memory for building the page list is either taken from
       * the allocated pages themselves (for small pool items) or taken from
       * an internal pool of page headers (`phpool').
       */
      
      /* List of all pools */
      SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
      
      /*
       * Every pool gets a unique serial number assigned to it. If this counter
       * wraps, we're screwed, but we shouldn't create so many pools anyway.
       */
      unsigned int pool_serial;
      unsigned int pool_count;
      
      /* Lock the previous variables making up the global pool state */
      struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
      
      /* Private pool for page header structures */
      struct pool phpool;
      
      struct pool_lock_ops {
              void        (*pl_init)(struct pool *, union pool_lock *,
                          const struct lock_type *);
              void        (*pl_enter)(union pool_lock *);
              int        (*pl_enter_try)(union pool_lock *);
              void        (*pl_leave)(union pool_lock *);
              void        (*pl_assert_locked)(union pool_lock *);
              void        (*pl_assert_unlocked)(union pool_lock *);
              int        (*pl_sleep)(void *, union pool_lock *, int, const char *, int);
      };
      
      static const struct pool_lock_ops pool_lock_ops_mtx;
      static const struct pool_lock_ops pool_lock_ops_rw;
      
      #ifdef WITNESS
      #define pl_init(pp, pl) do {                                                \
              static const struct lock_type __lock_type = { .lt_name = #pl };        \
              (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);                \
      } while (0)
      #else /* WITNESS */
      #define pl_init(pp, pl)                (pp)->pr_lock_ops->pl_init(pp, pl, NULL)
      #endif /* WITNESS */
      
      static inline void
      pl_enter(struct pool *pp, union pool_lock *pl)
      {
              pp->pr_lock_ops->pl_enter(pl);
      }
      static inline int
      pl_enter_try(struct pool *pp, union pool_lock *pl)
      {
              return pp->pr_lock_ops->pl_enter_try(pl);
      }
      static inline void
      pl_leave(struct pool *pp, union pool_lock *pl)
      {
              pp->pr_lock_ops->pl_leave(pl);
      }
      static inline void
      pl_assert_locked(struct pool *pp, union pool_lock *pl)
      {
              pp->pr_lock_ops->pl_assert_locked(pl);
      }
      static inline void
      pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
      {
              pp->pr_lock_ops->pl_assert_unlocked(pl);
      }
      static inline int
      pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
          const char *wmesg, int timo)
      {
              return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg, timo);
      }
      
      struct pool_item {
              u_long                                pi_magic;
              XSIMPLEQ_ENTRY(pool_item)        pi_list;
      };
      #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
      
      struct pool_page_header {
              /* Page headers */
              TAILQ_ENTRY(pool_page_header)
                                      ph_entry;        /* pool page list */
              XSIMPLEQ_HEAD(, pool_item)
                                      ph_items;        /* free items on the page */
              RBT_ENTRY(pool_page_header)
                                      ph_node;        /* off-page page headers */
              unsigned int                ph_nmissing;        /* # of chunks in use */
              caddr_t                        ph_page;        /* this page's address */
              caddr_t                        ph_colored;        /* page's colored address */
              unsigned long                ph_magic;
              int                        ph_tick;
      };
      #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
      #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
      
      #ifdef MULTIPROCESSOR
      struct pool_cache_item {
              struct pool_cache_item        *ci_next;        /* next item in list */
              unsigned long                 ci_nitems;        /* number of items in list */
              TAILQ_ENTRY(pool_cache_item)
                                       ci_nextl;        /* entry in list of lists */
      };
      
      /* we store whether the cached item is poisoned in the high bit of nitems */
      #define POOL_CACHE_ITEM_NITEMS_MASK        0x7ffffffUL
      #define POOL_CACHE_ITEM_NITEMS_POISON        0x8000000UL
      
      #define POOL_CACHE_ITEM_NITEMS(_ci)                                        \
          ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
      
      #define POOL_CACHE_ITEM_POISONED(_ci)                                        \
          ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
      
      struct pool_cache {
              struct pool_cache_item        *pc_actv;        /* active list of items */
              unsigned long                 pc_nactv;        /* actv head nitems cache */
              struct pool_cache_item        *pc_prev;        /* previous list of items */
      
              uint64_t                 pc_gen;        /* generation number */
              uint64_t                 pc_nget;        /* # of successful requests */
              uint64_t                 pc_nfail;        /* # of unsuccessful reqs */
              uint64_t                 pc_nput;        /* # of releases */
              uint64_t                 pc_nlget;        /* # of list requests */
              uint64_t                 pc_nlfail;        /* # of fails getting a list */
              uint64_t                 pc_nlput;        /* # of list releases */
      
              int                         pc_nout;
      };
      
      void        *pool_cache_get(struct pool *);
      void         pool_cache_put(struct pool *, void *);
      void         pool_cache_destroy(struct pool *);
      void         pool_cache_gc(struct pool *);
      #endif
      void         pool_cache_pool_info(struct pool *, struct kinfo_pool *);
      int         pool_cache_info(struct pool *, void *, size_t *);
      int         pool_cache_cpus_info(struct pool *, void *, size_t *);
      
      #ifdef POOL_DEBUG
      int        pool_debug = 1;
      #else
      int        pool_debug = 0;
      #endif
      
      #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
      
      struct pool_page_header *
               pool_p_alloc(struct pool *, int, int *);
      void         pool_p_insert(struct pool *, struct pool_page_header *);
      void         pool_p_remove(struct pool *, struct pool_page_header *);
      void         pool_p_free(struct pool *, struct pool_page_header *);
      
      void         pool_update_curpage(struct pool *);
      void        *pool_do_get(struct pool *, int, int *);
      void         pool_do_put(struct pool *, void *);
      int         pool_chk_page(struct pool *, struct pool_page_header *, int);
      int         pool_chk(struct pool *);
      void         pool_get_done(struct pool *, void *, void *);
      void         pool_runqueue(struct pool *, int);
      
      void        *pool_allocator_alloc(struct pool *, int, int *);
      void         pool_allocator_free(struct pool *, void *);
      
      /*
       * The default pool allocator.
       */
      void        *pool_page_alloc(struct pool *, int, int *);
      void        pool_page_free(struct pool *, void *);
      
      /*
       * safe for interrupts; this is the default allocator
       */
      struct pool_allocator pool_allocator_single = {
              pool_page_alloc,
              pool_page_free,
              POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
      };
      
      void        *pool_multi_alloc(struct pool *, int, int *);
      void        pool_multi_free(struct pool *, void *);
      
      struct pool_allocator pool_allocator_multi = {
              pool_multi_alloc,
              pool_multi_free,
              POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
      };
      
      void        *pool_multi_alloc_ni(struct pool *, int, int *);
      void        pool_multi_free_ni(struct pool *, void *);
      
      struct pool_allocator pool_allocator_multi_ni = {
              pool_multi_alloc_ni,
              pool_multi_free_ni,
              POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
      };
      
      #ifdef DDB
      void         pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
                   __attribute__((__format__(__kprintf__,1,2))));
      void         pool_print1(struct pool *, const char *, int (*)(const char *, ...)
                   __attribute__((__format__(__kprintf__,1,2))));
      #endif
      
      /* stale page garbage collectors */
      void        pool_gc_sched(void *);
      struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
      void        pool_gc_pages(void *);
      struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
      int pool_wait_free = 1;
      int pool_wait_gc = 8;
      
   11 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
      
      static inline int
      phtree_compare(const struct pool_page_header *a,
          const struct pool_page_header *b)
      {
              vaddr_t va = (vaddr_t)a->ph_page;
              vaddr_t vb = (vaddr_t)b->ph_page;
      
              /* the compares in this order are important for the NFIND to work */
              if (vb < va)
                      return (-1);
              if (vb > va)
                      return (1);
      
              return (0);
      }
      
 2758 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
      
      /*
       * Return the pool page header based on page address.
       */
      static inline struct pool_page_header *
      pr_find_pagehead(struct pool *pp, void *v)
      {
              struct pool_page_header *ph, key;
      
              if (POOL_INPGHDR(pp)) {
                      caddr_t page;
      
 1183                 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
      
                      return ((struct pool_page_header *)(page + pp->pr_phoffset));
              }
      
              key.ph_page = v;
              ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
              if (ph == NULL)
                      panic("%s: %s: page header missing", __func__, pp->pr_wchan);
      
              KASSERT(ph->ph_page <= (caddr_t)v);
 2758         if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
                      panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
      
              return (ph);
      }
      
      /*
       * Initialize the given pool resource structure.
       *
       * We export this routine to allow other kernel parts to declare
       * static pools that must be initialized before malloc() is available.
       */
      void
      pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
          const char *wchan, struct pool_allocator *palloc)
    3 {
              int off = 0, space;
              unsigned int pgsize = PAGE_SIZE, items;
              size_t pa_pagesz;
      #ifdef DIAGNOSTIC
              struct pool *iter;
      #endif
      
              if (align == 0)
                      align = ALIGN(1);
      
              if (size < sizeof(struct pool_item))
                      size = sizeof(struct pool_item);
      
              size = roundup(size, align);
      
    2         while (size * 8 > pgsize)
                      pgsize <<= 1;
      
              if (palloc == NULL) {
    1                 if (pgsize > PAGE_SIZE) {
    2                         palloc = ISSET(flags, PR_WAITOK) ?
                                  &pool_allocator_multi_ni : &pool_allocator_multi;
                      } else
                              palloc = &pool_allocator_single;
      
                      pa_pagesz = palloc->pa_pagesz;
              } else {
                      size_t pgsizes;
      
                      pa_pagesz = palloc->pa_pagesz;
                      if (pa_pagesz == 0)
                              pa_pagesz = POOL_ALLOC_DEFAULT;
      
                      pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
      
                      /* make sure the allocator can fit at least one item */
                      if (size > pgsizes) {
                              panic("%s: pool %s item size 0x%zx > "
                                  "allocator %p sizes 0x%zx", __func__, wchan,
                                  size, palloc, pgsizes);
                      }
      
                      /* shrink pgsize until it fits into the range */
                      while (!ISSET(pgsizes, pgsize))
                              pgsize >>= 1;
              }
              KASSERT(ISSET(pa_pagesz, pgsize));
      
              items = pgsize / size;
      
              /*
               * Decide whether to put the page header off page to avoid
               * wasting too large a part of the page. Off-page page headers
               * go into an RB tree, so we can match a returned item with
               * its header based on the page address.
               */
              if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
                      if (pgsize - (size * items) >
                          sizeof(struct pool_page_header)) {
    2                         off = pgsize - sizeof(struct pool_page_header);
                      } else if (sizeof(struct pool_page_header) * 2 >= size) {
    1                         off = pgsize - sizeof(struct pool_page_header);
                              items = off / size;
                      }
              }
      
              KASSERT(items > 0);
      
              /*
               * Initialize the pool structure.
               */
              memset(pp, 0, sizeof(*pp));
              if (ISSET(flags, PR_RWLOCK)) {
                      KASSERT(flags & PR_WAITOK);
                      pp->pr_lock_ops = &pool_lock_ops_rw;
              } else
    3                 pp->pr_lock_ops = &pool_lock_ops_mtx;
              TAILQ_INIT(&pp->pr_emptypages);
              TAILQ_INIT(&pp->pr_fullpages);
              TAILQ_INIT(&pp->pr_partpages);
              pp->pr_curpage = NULL;
              pp->pr_npages = 0;
              pp->pr_minitems = 0;
              pp->pr_minpages = 0;
              pp->pr_maxpages = 8;
              pp->pr_size = size;
              pp->pr_pgsize = pgsize;
              pp->pr_pgmask = ~0UL ^ (pgsize - 1);
              pp->pr_phoffset = off;
              pp->pr_itemsperpage = items;
              pp->pr_wchan = wchan;
              pp->pr_alloc = palloc;
              pp->pr_nitems = 0;
              pp->pr_nout = 0;
              pp->pr_hardlimit = UINT_MAX;
              pp->pr_hardlimit_warning = NULL;
              pp->pr_hardlimit_ratecap.tv_sec = 0;
              pp->pr_hardlimit_ratecap.tv_usec = 0;
              pp->pr_hardlimit_warning_last.tv_sec = 0;
              pp->pr_hardlimit_warning_last.tv_usec = 0;
              RBT_INIT(phtree, &pp->pr_phtree);
      
              /*
               * Use the space between the chunks and the page header
               * for cache coloring.
               */
              space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
              space -= pp->pr_itemsperpage * pp->pr_size;
              pp->pr_align = align;
              pp->pr_maxcolors = (space / align) + 1;
      
              pp->pr_nget = 0;
              pp->pr_nfail = 0;
              pp->pr_nput = 0;
              pp->pr_npagealloc = 0;
              pp->pr_npagefree = 0;
              pp->pr_hiwat = 0;
              pp->pr_nidle = 0;
      
              pp->pr_ipl = ipl;
              pp->pr_flags = flags;
      
              pl_init(pp, &pp->pr_lock);
              pl_init(pp, &pp->pr_requests_lock);
              TAILQ_INIT(&pp->pr_requests);
      
    3         if (phpool.pr_size == 0) {
                      pool_init(&phpool, sizeof(struct pool_page_header), 0,
                          IPL_HIGH, 0, "phpool", NULL);
      
                      /* make sure phpool wont "recurse" */
                      KASSERT(POOL_INPGHDR(&phpool));
              }
      
              /* pglistalloc/constraint parameters */
              pp->pr_crange = &kp_dirty;
      
              /* Insert this into the list of all pools. */
              rw_enter_write(&pool_lock);
      #ifdef DIAGNOSTIC
    3         SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
                      if (iter == pp)
                              panic("%s: pool %s already on list", __func__, wchan);
              }
      #endif
      
              pp->pr_serial = ++pool_serial;
              if (pool_serial == 0)
                      panic("%s: too much uptime", __func__);
      
    3         SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
              pool_count++;
              rw_exit_write(&pool_lock);
      }
      
      /*
       * Decommission a pool resource.
       */
      void
      pool_destroy(struct pool *pp)
    8 {
              struct pool_page_header *ph;
              struct pool *prev, *iter;
      
      #ifdef MULTIPROCESSOR
              if (pp->pr_cache != NULL)
                      pool_cache_destroy(pp);
      #endif
      
      #ifdef DIAGNOSTIC
              if (pp->pr_nout != 0)
                      panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
      #endif
      
              /* Remove from global pool list */
              rw_enter_write(&pool_lock);
              pool_count--;
              if (pp == SIMPLEQ_FIRST(&pool_head))
    8                 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
              else {
                      prev = SIMPLEQ_FIRST(&pool_head);
                      SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
                              if (iter == pp) {
                                      SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
                                          pr_poollist);
                                      break;
                              }
                              prev = iter;
                      }
              }
              rw_exit_write(&pool_lock);
      
              /* Remove all pages */
    8         while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
                      pl_enter(pp, &pp->pr_lock);
    3                 pool_p_remove(pp, ph);
                      pl_leave(pp, &pp->pr_lock);
                      pool_p_free(pp, ph);
              }
              KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
              KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
    8 }
      
      void
      pool_request_init(struct pool_request *pr,
          void (*handler)(struct pool *, void *, void *), void *cookie)
      {
              pr->pr_handler = handler;
              pr->pr_cookie = cookie;
              pr->pr_item = NULL;
      }
      
      void
      pool_request(struct pool *pp, struct pool_request *pr)
      {
              pl_enter(pp, &pp->pr_requests_lock);
              TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
              pool_runqueue(pp, PR_NOWAIT);
              pl_leave(pp, &pp->pr_requests_lock);
      }
      
      struct pool_get_memory {
              union pool_lock lock;
              void * volatile v;
      };
      
      /*
       * Grab an item from the pool.
       */
      void *
      pool_get(struct pool *pp, int flags)
 5405 {
              void *v = NULL;
              int slowdown = 0;
      
              KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
              if (pp->pr_flags & PR_RWLOCK)
                      KASSERT(flags & PR_WAITOK);
      
      #ifdef MULTIPROCESSOR
              if (pp->pr_cache != NULL) {
                      v = pool_cache_get(pp);
                      if (v != NULL)
                              goto good;
              }
      #endif
      
              pl_enter(pp, &pp->pr_lock);
              if (pp->pr_nout >= pp->pr_hardlimit) {
                      if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
                              goto fail;
 5405         } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
                      if (ISSET(flags, PR_NOWAIT))
                              goto fail;
              }
              pl_leave(pp, &pp->pr_lock);
      
 5405         if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
                      yield();
      
 5405         if (v == NULL) {
                      struct pool_get_memory mem = { .v = NULL };
                      struct pool_request pr;
      
      #ifdef DIAGNOSTIC
                      if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
                              panic("%s: cannot sleep for memory during boot",
                                  __func__);
      #endif
                      pl_init(pp, &mem.lock);
                      pool_request_init(&pr, pool_get_done, &mem);
                      pool_request(pp, &pr);
      
                      pl_enter(pp, &mem.lock);
                      while (mem.v == NULL)
                              pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan, 0);
                      pl_leave(pp, &mem.lock);
      
                      v = mem.v;
              }
      
      #ifdef MULTIPROCESSOR
      good:
      #endif
 5322         if (ISSET(flags, PR_ZERO))
 1218                 memset(v, 0, pp->pr_size);
      
              return (v);
      
      fail:
              pp->pr_nfail++;
              pl_leave(pp, &pp->pr_lock);
              return (NULL);
      }
      
      void
      pool_get_done(struct pool *pp, void *xmem, void *v)
      {
              struct pool_get_memory *mem = xmem;
      
              pl_enter(pp, &mem->lock);
              mem->v = v;
              pl_leave(pp, &mem->lock);
      
              wakeup_one(mem);
      }
      
      void
      pool_runqueue(struct pool *pp, int flags)
      {
              struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
              struct pool_request *pr;
      
              pl_assert_unlocked(pp, &pp->pr_lock);
              pl_assert_locked(pp, &pp->pr_requests_lock);
      
              if (pp->pr_requesting++)
                      return;
      
              do {
                      pp->pr_requesting = 1;
      
                      /* no TAILQ_JOIN? :( */
                      while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
                              TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
                              TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
                      }
                      if (TAILQ_EMPTY(&prl))
                              continue;
      
                      pl_leave(pp, &pp->pr_requests_lock);
      
                      pl_enter(pp, &pp->pr_lock);
                      pr = TAILQ_FIRST(&prl);
                      while (pr != NULL) {
                              int slowdown = 0;
      
                              if (pp->pr_nout >= pp->pr_hardlimit)
                                      break;
      
                              pr->pr_item = pool_do_get(pp, flags, &slowdown);
                              if (pr->pr_item == NULL) /* || slowdown ? */
                                      break;
      
                              pr = TAILQ_NEXT(pr, pr_entry);
                      }
                      pl_leave(pp, &pp->pr_lock);
      
                      while ((pr = TAILQ_FIRST(&prl)) != NULL &&
                          pr->pr_item != NULL) {
                              TAILQ_REMOVE(&prl, pr, pr_entry);
                              (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
                      }
      
                      pl_enter(pp, &pp->pr_requests_lock);
              } while (--pp->pr_requesting);
      
              /* no TAILQ_JOIN :( */
              while ((pr = TAILQ_FIRST(&prl)) != NULL) {
                      TAILQ_REMOVE(&prl, pr, pr_entry);
                      TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
              }
      }
      
      void *
      pool_do_get(struct pool *pp, int flags, int *slowdown)
 5405 {
              struct pool_item *pi;
              struct pool_page_header *ph;
      
              pl_assert_locked(pp, &pp->pr_lock);
      
 5405         splassert(pp->pr_ipl);
      
              /*
               * Account for this item now to avoid races if we need to give up
               * pr_lock to allocate a page.
               */
              pp->pr_nout++;
      
 5405         if (pp->pr_curpage == NULL) {
                      pl_leave(pp, &pp->pr_lock);
                      ph = pool_p_alloc(pp, flags, slowdown);
                      pl_enter(pp, &pp->pr_lock);
      
                      if (ph == NULL) {
                              pp->pr_nout--;
                              return (NULL);
                      }
      
   70                 pool_p_insert(pp, ph);
              }
      
              ph = pp->pr_curpage;
              pi = XSIMPLEQ_FIRST(&ph->ph_items);
              if (__predict_false(pi == NULL))
                      panic("%s: %s: page empty", __func__, pp->pr_wchan);
      
              if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
                      panic("%s: %s free list modified: "
                          "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
                          __func__, pp->pr_wchan, ph->ph_page, pi,
                          0, pi->pi_magic, POOL_IMAGIC(ph, pi));
              }
      
 5405         XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
      
      #ifdef DIAGNOSTIC
              if (pool_debug && POOL_PHPOISON(ph)) {
                      size_t pidx;
                      uint32_t pval;
                      if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
                          &pidx, &pval)) {
                              int *ip = (int *)(pi + 1);
                              panic("%s: %s free list modified: "
                                  "page %p; item addr %p; offset 0x%zx=0x%x",
                                  __func__, pp->pr_wchan, ph->ph_page, pi,
                                  (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
                      }
 5405         }
      #endif /* DIAGNOSTIC */
      
 5344         if (ph->ph_nmissing++ == 0) {
                      /*
                       * This page was previously empty.  Move it to the list of
                       * partially-full pages.  This page is already curpage.
                       */
 1151                 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
                      TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
      
                      pp->pr_nidle--;
              }
      
 5395         if (ph->ph_nmissing == pp->pr_itemsperpage) {
                      /*
                       * This page is now full.  Move it to the full list
                       * and select a new current page.
                       */
 2165                 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
                      TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
 2165                 pool_update_curpage(pp);
              }
      
              pp->pr_nget++;
      
              return (pi);
      }
      
      /*
       * Return resource to the pool.
       */
      void
      pool_put(struct pool *pp, void *v)
 3378 {
              struct pool_page_header *ph, *freeph = NULL;
      
      #ifdef DIAGNOSTIC
              if (v == NULL)
                      panic("%s: NULL item", __func__);
      #endif
      
      #ifdef MULTIPROCESSOR
              if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
                      pool_cache_put(pp, v);
                      return;
              }
      #endif
      
              pl_enter(pp, &pp->pr_lock);
      
              pool_do_put(pp, v);
      
              pp->pr_nout--;
              pp->pr_nput++;
      
              /* is it time to free a page? */
 3378         if (pp->pr_nidle > pp->pr_maxpages &&
                  (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
                  (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
                      freeph = ph;
    2                 pool_p_remove(pp, freeph);
              }
      
              pl_leave(pp, &pp->pr_lock);
      
 3378         if (freeph != NULL)
    2                 pool_p_free(pp, freeph);
      
 3378         pool_wakeup(pp);
      }
      
      void
      pool_wakeup(struct pool *pp)
      {
 3378         if (!TAILQ_EMPTY(&pp->pr_requests)) {
                      pl_enter(pp, &pp->pr_requests_lock);
                      pool_runqueue(pp, PR_NOWAIT);
                      pl_leave(pp, &pp->pr_requests_lock);
              }
      }
      
      void
      pool_do_put(struct pool *pp, void *v)
 3378 {
              struct pool_item *pi = v;
              struct pool_page_header *ph;
      
 3378         splassert(pp->pr_ipl);
      
 3378         ph = pr_find_pagehead(pp, v);
      
      #ifdef DIAGNOSTIC
              if (pool_debug) {
                      struct pool_item *qi;
 3378                 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
                              if (pi == qi) {
                                      panic("%s: %s: double pool_put: %p", __func__,
                                          pp->pr_wchan, pi);
                              }
                      }
              }
      #endif /* DIAGNOSTIC */
      
              pi->pi_magic = POOL_IMAGIC(ph, pi);
 3378         XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
      #ifdef DIAGNOSTIC
              if (POOL_PHPOISON(ph))
 3378                 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
      #endif /* DIAGNOSTIC */
      
 3339         if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
                      /*
                       * The page was previously completely full, move it to the
                       * partially-full list.
                       */
 1290                 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
                      TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
              }
      
 3218         if (ph->ph_nmissing == 0) {
                      /*
                       * The page is now empty, so move it to the empty page list.
                       */
                      pp->pr_nidle++;
      
                      ph->ph_tick = ticks;
  906                 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
                      TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
  906                 pool_update_curpage(pp);
              }
      }
      
      /*
       * Add N items to the pool.
       */
      int
      pool_prime(struct pool *pp, int n)
    1 {
              struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
              struct pool_page_header *ph;
              int newpages;
      
              newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
      
    1         while (newpages-- > 0) {
                      int slowdown = 0;
      
                      ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
                      if (ph == NULL) /* or slowdown? */
                              break;
      
                      TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
              }
      
              pl_enter(pp, &pp->pr_lock);
    1         while ((ph = TAILQ_FIRST(&pl)) != NULL) {
                      TAILQ_REMOVE(&pl, ph, ph_entry);
    1                 pool_p_insert(pp, ph);
              }
              pl_leave(pp, &pp->pr_lock);
      
              return (0);
      }
      
      struct pool_page_header *
      pool_p_alloc(struct pool *pp, int flags, int *slowdown)
   71 {
              struct pool_page_header *ph;
              struct pool_item *pi;
   66         caddr_t addr;
              unsigned int order;
              int o;
              int n;
      
              pl_assert_unlocked(pp, &pp->pr_lock);
              KASSERT(pp->pr_size >= sizeof(*pi));
      
              addr = pool_allocator_alloc(pp, flags, slowdown);
              if (addr == NULL)
                      return (NULL);
      
              if (POOL_INPGHDR(pp))
                      ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
              else {
                      ph = pool_get(&phpool, flags);
    9                 if (ph == NULL) {
                              pool_allocator_free(pp, addr);
                              return (NULL);
                      }
              }
      
              XSIMPLEQ_INIT(&ph->ph_items);
              ph->ph_page = addr;
              addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
              ph->ph_colored = addr;
              ph->ph_nmissing = 0;
              arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
      #ifdef DIAGNOSTIC
              /* use a bit in ph_magic to record if we poison page items */
              if (pool_debug)
                      SET(ph->ph_magic, POOL_MAGICBIT);
              else
                      CLR(ph->ph_magic, POOL_MAGICBIT);
      #endif /* DIAGNOSTIC */
      
              n = pp->pr_itemsperpage;
              o = 32;
   71         while (n--) {
                      pi = (struct pool_item *)addr;
                      pi->pi_magic = POOL_IMAGIC(ph, pi);
      
   71                 if (o == 32) {
   71                         order = arc4random();
                              o = 0;
                      }
                      if (ISSET(order, 1 << o++))
   71                         XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
                      else
   71                         XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
      
      #ifdef DIAGNOSTIC
                      if (POOL_PHPOISON(ph))
   71                         poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
      #endif /* DIAGNOSTIC */
      
   71                 addr += pp->pr_size;
              }
      
              return (ph);
      }
      
      void
      pool_p_free(struct pool *pp, struct pool_page_header *ph)
    5 {
              struct pool_item *pi;
      
              pl_assert_unlocked(pp, &pp->pr_lock);
              KASSERT(ph->ph_nmissing == 0);
      
    5         XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
                      if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
                              panic("%s: %s free list modified: "
                                  "page %p; item addr %p; offset 0x%x=0x%lx",
                                  __func__, pp->pr_wchan, ph->ph_page, pi,
                                  0, pi->pi_magic);
                      }
      
      #ifdef DIAGNOSTIC
                      if (POOL_PHPOISON(ph)) {
                              size_t pidx;
                              uint32_t pval;
                              if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
                                  &pidx, &pval)) {
                                      int *ip = (int *)(pi + 1);
                                      panic("%s: %s free list modified: "
                                          "page %p; item addr %p; offset 0x%zx=0x%x",
                                          __func__, pp->pr_wchan, ph->ph_page, pi,
                                          pidx * sizeof(int), ip[pidx]);
                              }
    5                 }
      #endif
              }
      
              pool_allocator_free(pp, ph->ph_page);
      
    3         if (!POOL_INPGHDR(pp))
    2                 pool_put(&phpool, ph);
      }
      
      void
      pool_p_insert(struct pool *pp, struct pool_page_header *ph)
      {
              pl_assert_locked(pp, &pp->pr_lock);
      
              /* If the pool was depleted, point at the new page */
              if (pp->pr_curpage == NULL)
   71                 pp->pr_curpage = ph;
      
              TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
   66         if (!POOL_INPGHDR(pp))
    9                 RBT_INSERT(phtree, &pp->pr_phtree, ph);
      
              pp->pr_nitems += pp->pr_itemsperpage;
              pp->pr_nidle++;
      
              pp->pr_npagealloc++;
   13         if (++pp->pr_npages > pp->pr_hiwat)
   60                 pp->pr_hiwat = pp->pr_npages;
      }
      
      void
      pool_p_remove(struct pool *pp, struct pool_page_header *ph)
      {
              pl_assert_locked(pp, &pp->pr_lock);
      
              pp->pr_npagefree++;
              pp->pr_npages--;
              pp->pr_nidle--;
              pp->pr_nitems -= pp->pr_itemsperpage;
      
    3         if (!POOL_INPGHDR(pp))
    2                 RBT_REMOVE(phtree, &pp->pr_phtree, ph);
              TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
      
    5         pool_update_curpage(pp);
      }
      
      void
      pool_update_curpage(struct pool *pp)
      {
              pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
 2161         if (pp->pr_curpage == NULL) {
  976                 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
              }
      }
      
      void
      pool_setlowat(struct pool *pp, int n)
      {
              int prime = 0;
      
              pl_enter(pp, &pp->pr_lock);
              pp->pr_minitems = n;
              pp->pr_minpages = (n == 0)
                      ? 0
                      : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
      
              if (pp->pr_nitems < n)
                      prime = n - pp->pr_nitems;
              pl_leave(pp, &pp->pr_lock);
      
              if (prime > 0)
                      pool_prime(pp, prime);
      }
      
      void
      pool_sethiwat(struct pool *pp, int n)
      {
              pp->pr_maxpages = (n == 0)
                      ? 0
                      : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
      }
      
      int
      pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
      {
              int error = 0;
      
              if (n < pp->pr_nout) {
                      error = EINVAL;
                      goto done;
              }
      
              pp->pr_hardlimit = n;
              pp->pr_hardlimit_warning = warnmsg;
              pp->pr_hardlimit_ratecap.tv_sec = ratecap;
              pp->pr_hardlimit_warning_last.tv_sec = 0;
              pp->pr_hardlimit_warning_last.tv_usec = 0;
      
      done:
              return (error);
      }
      
      void
      pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
      {
              pp->pr_crange = mode;
      }
      
      /*
       * Release all complete pages that have not been used recently.
       *
       * Returns non-zero if any pages have been reclaimed.
       */
      int
      pool_reclaim(struct pool *pp)
      {
              struct pool_page_header *ph, *phnext;
              struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
      
              pl_enter(pp, &pp->pr_lock);
              for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
                      phnext = TAILQ_NEXT(ph, ph_entry);
      
                      /* Check our minimum page claim */
                      if (pp->pr_npages <= pp->pr_minpages)
                              break;
      
                      /*
                       * If freeing this page would put us below
                       * the low water mark, stop now.
                       */
                      if ((pp->pr_nitems - pp->pr_itemsperpage) <
                          pp->pr_minitems)
                              break;
      
                      pool_p_remove(pp, ph);
                      TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
              }
              pl_leave(pp, &pp->pr_lock);
      
              if (TAILQ_EMPTY(&pl))
                      return (0);
      
              while ((ph = TAILQ_FIRST(&pl)) != NULL) {
                      TAILQ_REMOVE(&pl, ph, ph_entry);
                      pool_p_free(pp, ph);
              }
      
              return (1);
      }
      
      /*
       * Release all complete pages that have not been used recently
       * from all pools.
       */
      void
      pool_reclaim_all(void)
      {
              struct pool        *pp;
      
              rw_enter_read(&pool_lock);
              SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
                      pool_reclaim(pp);
              rw_exit_read(&pool_lock);
      }
      
      #ifdef DDB
      #include <machine/db_machdep.h>
      #include <ddb/db_output.h>
      
      /*
       * Diagnostic helpers.
       */
      void
      pool_printit(struct pool *pp, const char *modif,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
      {
              pool_print1(pp, modif, pr);
      }
      
      void
      pool_print_pagelist(struct pool_pagelist *pl,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
      {
              struct pool_page_header *ph;
              struct pool_item *pi;
      
              TAILQ_FOREACH(ph, pl, ph_entry) {
                      (*pr)("\t\tpage %p, color %p, nmissing %d\n",
                          ph->ph_page, ph->ph_colored, ph->ph_nmissing);
                      XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
                              if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
                                      (*pr)("\t\t\titem %p, magic 0x%lx\n",
                                          pi, pi->pi_magic);
                              }
                      }
              }
      }
      
      void
      pool_print1(struct pool *pp, const char *modif,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
      {
              struct pool_page_header *ph;
              int print_pagelist = 0;
              char c;
      
              while ((c = *modif++) != '\0') {
                      if (c == 'p')
                              print_pagelist = 1;
                      modif++;
              }
      
              (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
                  pp->pr_maxcolors);
              (*pr)("\talloc %p\n", pp->pr_alloc);
              (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
                  pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
              (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
                  pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
      
              (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
                  pp->pr_nget, pp->pr_nfail, pp->pr_nput);
              (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
                  pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
      
              if (print_pagelist == 0)
                      return;
      
              if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
                      (*pr)("\n\tempty page list:\n");
              pool_print_pagelist(&pp->pr_emptypages, pr);
              if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
                      (*pr)("\n\tfull page list:\n");
              pool_print_pagelist(&pp->pr_fullpages, pr);
              if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
                      (*pr)("\n\tpartial-page list:\n");
              pool_print_pagelist(&pp->pr_partpages, pr);
      
              if (pp->pr_curpage == NULL)
                      (*pr)("\tno current page\n");
              else
                      (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
      }
      
      void
      db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
      {
              struct pool *pp;
              char maxp[16];
              int ovflw;
              char mode;
      
              mode = modif[0];
              if (mode != '\0' && mode != 'a') {
                      db_printf("usage: show all pools [/a]\n");
                      return;
              }
      
              if (mode == '\0')
                      db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
                          "Name",
                          "Size",
                          "Requests",
                          "Fail",
                          "Releases",
                          "Pgreq",
                          "Pgrel",
                          "Npage",
                          "Hiwat",
                          "Minpg",
                          "Maxpg",
                          "Idle");
              else
                      db_printf("%-12s %18s %18s\n",
                          "Name", "Address", "Allocator");
      
              SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
                      if (mode == 'a') {
                              db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
                                  pp->pr_alloc);
                              continue;
                      }
      
                      if (!pp->pr_nget)
                              continue;
      
                      if (pp->pr_maxpages == UINT_MAX)
                              snprintf(maxp, sizeof maxp, "inf");
                      else
                              snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
      
      #define PRWORD(ovflw, fmt, width, fixed, val) do {        \
              (ovflw) += db_printf((fmt),                        \
                  (width) - (fixed) - (ovflw) > 0 ?                \
                  (width) - (fixed) - (ovflw) : 0,                \
                  (val)) - (width);                                \
              if ((ovflw) < 0)                                \
                      (ovflw) = 0;                                \
      } while (/* CONSTCOND */0)
      
                      ovflw = 0;
                      PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
                      PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
                      PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
                      PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
                      PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
                      PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
                      PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
                      PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
                      PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
                      PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
                      PRWORD(ovflw, " %*s", 6, 1, maxp);
                      PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
      
                      pool_chk(pp);
              }
      }
      #endif /* DDB */
      
      #if defined(POOL_DEBUG) || defined(DDB)
      int
      pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
      {
              struct pool_item *pi;
              caddr_t page;
              int n;
              const char *label = pp->pr_wchan;
      
              page = (caddr_t)((u_long)ph & pp->pr_pgmask);
              if (page != ph->ph_page && POOL_INPGHDR(pp)) {
                      printf("%s: ", label);
                      printf("pool(%p:%s): page inconsistency: page %p; "
                          "at page head addr %p (p %p)\n",
                          pp, pp->pr_wchan, ph->ph_page, ph, page);
                      return 1;
              }
      
              for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
                   pi != NULL;
                   pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
                      if ((caddr_t)pi < ph->ph_page ||
                          (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
                              printf("%s: ", label);
                              printf("pool(%p:%s): page inconsistency: page %p;"
                                  " item ordinal %d; addr %p\n", pp,
                                  pp->pr_wchan, ph->ph_page, n, pi);
                              return (1);
                      }
      
                      if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
                              printf("%s: ", label);
                              printf("pool(%p:%s): free list modified: "
                                  "page %p; item ordinal %d; addr %p "
                                  "(p %p); offset 0x%x=0x%lx\n",
                                  pp, pp->pr_wchan, ph->ph_page, n, pi, page,
                                  0, pi->pi_magic);
                      }
      
      #ifdef DIAGNOSTIC
                      if (POOL_PHPOISON(ph)) {
                              size_t pidx;
                              uint32_t pval;
                              if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
                                  &pidx, &pval)) {
                                      int *ip = (int *)(pi + 1);
                                      printf("pool(%s): free list modified: "
                                          "page %p; item ordinal %d; addr %p "
                                          "(p %p); offset 0x%zx=0x%x\n",
                                          pp->pr_wchan, ph->ph_page, n, pi,
                                          page, pidx * sizeof(int), ip[pidx]);
                              }
                      }
      #endif /* DIAGNOSTIC */
              }
              if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
                      printf("pool(%p:%s): page inconsistency: page %p;"
                          " %d on list, %d missing, %d items per page\n", pp,
                          pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
                          pp->pr_itemsperpage);
                      return 1;
              }
              if (expected >= 0 && n != expected) {
                      printf("pool(%p:%s): page inconsistency: page %p;"
                          " %d on list, %d missing, %d expected\n", pp,
                          pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
                          expected);
                      return 1;
              }
              return 0;
      }
      
      int
      pool_chk(struct pool *pp)
      {
              struct pool_page_header *ph;
              int r = 0;
      
              TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
                      r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
              TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
                      r += pool_chk_page(pp, ph, 0);
              TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
                      r += pool_chk_page(pp, ph, -1);
      
              return (r);
      }
      #endif /* defined(POOL_DEBUG) || defined(DDB) */
      
      #ifdef DDB
      void
      pool_walk(struct pool *pp, int full,
          int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
          void (*func)(void *, int, int (*)(const char *, ...)
                  __attribute__((__format__(__kprintf__,1,2)))))
      {
              struct pool_page_header *ph;
              struct pool_item *pi;
              caddr_t cp;
              int n;
      
              TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
                      cp = ph->ph_colored;
                      n = ph->ph_nmissing;
      
                      while (n--) {
                              func(cp, full, pr);
                              cp += pp->pr_size;
                      }
              }
      
              TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
                      cp = ph->ph_colored;
                      n = ph->ph_nmissing;
      
                      do {
                              XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
                                      if (cp == (caddr_t)pi)
                                              break;
                              }
                              if (cp != (caddr_t)pi) {
                                      func(cp, full, pr);
                                      n--;
                              }
      
                              cp += pp->pr_size;
                      } while (n > 0);
              }
      }
      #endif
      
      /*
       * We have three different sysctls.
       * kern.pool.npools - the number of pools.
       * kern.pool.pool.<pool#> - the pool struct for the pool#.
       * kern.pool.name.<pool#> - the name for pool#.
       */
      int
      sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
      {
              struct kinfo_pool pi;
              struct pool *pp;
              int rv = ENOENT;
      
              switch (name[0]) {
              case KERN_POOL_NPOOLS:
                      if (namelen != 1)
                              return (ENOTDIR);
                      return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
      
              case KERN_POOL_NAME:
              case KERN_POOL_POOL:
              case KERN_POOL_CACHE:
              case KERN_POOL_CACHE_CPUS:
                      break;
              default:
                      return (EOPNOTSUPP);
              }
      
              if (namelen != 2)
                      return (ENOTDIR);
      
              rw_enter_read(&pool_lock);
      
              SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
                      if (name[1] == pp->pr_serial)
                              break;
              }
      
              if (pp == NULL)
                      goto done;
      
              switch (name[0]) {
              case KERN_POOL_NAME:
                      rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
                      break;
              case KERN_POOL_POOL:
                      memset(&pi, 0, sizeof(pi));
      
                      pl_enter(pp, &pp->pr_lock);
                      pi.pr_size = pp->pr_size;
                      pi.pr_pgsize = pp->pr_pgsize;
                      pi.pr_itemsperpage = pp->pr_itemsperpage;
                      pi.pr_npages = pp->pr_npages;
                      pi.pr_minpages = pp->pr_minpages;
                      pi.pr_maxpages = pp->pr_maxpages;
                      pi.pr_hardlimit = pp->pr_hardlimit;
                      pi.pr_nout = pp->pr_nout;
                      pi.pr_nitems = pp->pr_nitems;
                      pi.pr_nget = pp->pr_nget;
                      pi.pr_nput = pp->pr_nput;
                      pi.pr_nfail = pp->pr_nfail;
                      pi.pr_npagealloc = pp->pr_npagealloc;
                      pi.pr_npagefree = pp->pr_npagefree;
                      pi.pr_hiwat = pp->pr_hiwat;
                      pi.pr_nidle = pp->pr_nidle;
                      pl_leave(pp, &pp->pr_lock);
      
                      pool_cache_pool_info(pp, &pi);
      
                      rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
                      break;
      
              case KERN_POOL_CACHE:
                      rv = pool_cache_info(pp, oldp, oldlenp);
                      break;
      
              case KERN_POOL_CACHE_CPUS:
                      rv = pool_cache_cpus_info(pp, oldp, oldlenp);
                      break;
              }
      
      done:
              rw_exit_read(&pool_lock);
      
              return (rv);
      }
      
      void
      pool_gc_sched(void *null)
      {
              task_add(systqmp, &pool_gc_task);
      }
      
      void
      pool_gc_pages(void *null)
      {
              struct pool *pp;
              struct pool_page_header *ph, *freeph;
              int s;
      
              rw_enter_read(&pool_lock);
              s = splvm(); /* XXX go to splvm until all pools _setipl properly */
              SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
      #ifdef MULTIPROCESSOR
                      if (pp->pr_cache != NULL)
                              pool_cache_gc(pp);
      #endif
      
                      if (pp->pr_nidle <= pp->pr_minpages || /* guess */
                          !pl_enter_try(pp, &pp->pr_lock)) /* try */
                              continue;
      
                      /* is it time to free a page? */
                      if (pp->pr_nidle > pp->pr_minpages &&
                          (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
                          (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
                              freeph = ph;
                              pool_p_remove(pp, freeph);
                      } else
                              freeph = NULL;
      
                      pl_leave(pp, &pp->pr_lock);
      
                      if (freeph != NULL)
                              pool_p_free(pp, freeph);
              }
              splx(s);
              rw_exit_read(&pool_lock);
      
              timeout_add_sec(&pool_gc_tick, 1);
      }
      
      /*
       * Pool backend allocators.
       */
      
      void *
      pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
      {
              void *v;
      
              v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
      
      #ifdef DIAGNOSTIC
              if (v != NULL && POOL_INPGHDR(pp)) {
                      vaddr_t addr = (vaddr_t)v;
                      if ((addr & pp->pr_pgmask) != addr) {
                              panic("%s: %s page address %p isnt aligned to %u",
                                  __func__, pp->pr_wchan, v, pp->pr_pgsize);
                      }
              }
      #endif
      
              return (v);
      }
      
      void
      pool_allocator_free(struct pool *pp, void *v)
      {
              struct pool_allocator *pa = pp->pr_alloc;
      
              (*pa->pa_free)(pp, v);
      }
      
      void *
      pool_page_alloc(struct pool *pp, int flags, int *slowdown)
   68 {
              struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
      
              kd.kd_waitok = ISSET(flags, PR_WAITOK);
              kd.kd_slowdown = slowdown;
      
              return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
      }
      
      void
      pool_page_free(struct pool *pp, void *v)
    3 {
              km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
      }
      
      void *
      pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
    1 {
              struct kmem_va_mode kv = kv_intrsafe;
              struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
              void *v;
              int s;
      
    1         if (POOL_INPGHDR(pp))
                      kv.kv_align = pp->pr_pgsize;
      
              kd.kd_waitok = ISSET(flags, PR_WAITOK);
              kd.kd_slowdown = slowdown;
      
              s = splvm();
              v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
              splx(s);
      
              return (v);
      }
      
      void
      pool_multi_free(struct pool *pp, void *v)
    2 {
              struct kmem_va_mode kv = kv_intrsafe;
              int s;
      
    2         if (POOL_INPGHDR(pp))
                      kv.kv_align = pp->pr_pgsize;
      
              s = splvm();
              km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
              splx(s);
      }
      
      void *
      pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
    2 {
              struct kmem_va_mode kv = kv_any;
              struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
              void *v;
      
    2         if (POOL_INPGHDR(pp))
                      kv.kv_align = pp->pr_pgsize;
      
              kd.kd_waitok = ISSET(flags, PR_WAITOK);
              kd.kd_slowdown = slowdown;
      
              KERNEL_LOCK();
              v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
              KERNEL_UNLOCK();
      
              return (v);
      }
      
      void
      pool_multi_free_ni(struct pool *pp, void *v)
      {
              struct kmem_va_mode kv = kv_any;
      
              if (POOL_INPGHDR(pp))
                      kv.kv_align = pp->pr_pgsize;
      
              KERNEL_LOCK();
              km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
              KERNEL_UNLOCK();
      }
      
      #ifdef MULTIPROCESSOR
      
      struct pool pool_caches; /* per cpu cache entries */
      
      void
      pool_cache_init(struct pool *pp)
      {
              struct cpumem *cm;
              struct pool_cache *pc;
              struct cpumem_iter i;
      
              if (pool_caches.pr_size == 0) {
                      pool_init(&pool_caches, sizeof(struct pool_cache),
                          CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
                          "plcache", NULL);
              }
      
              /* must be able to use the pool items as cache list items */
              KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
      
              cm = cpumem_get(&pool_caches);
      
              pl_init(pp, &pp->pr_cache_lock);
              arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
              TAILQ_INIT(&pp->pr_cache_lists);
              pp->pr_cache_nitems = 0;
              pp->pr_cache_tick = ticks;
              pp->pr_cache_items = 8;
              pp->pr_cache_contention = 0;
              pp->pr_cache_ngc = 0;
      
              CPUMEM_FOREACH(pc, &i, cm) {
                      pc->pc_actv = NULL;
                      pc->pc_nactv = 0;
                      pc->pc_prev = NULL;
      
                      pc->pc_nget = 0;
                      pc->pc_nfail = 0;
                      pc->pc_nput = 0;
                      pc->pc_nlget = 0;
                      pc->pc_nlfail = 0;
                      pc->pc_nlput = 0;
                      pc->pc_nout = 0;
              }
      
              membar_producer();
      
              pp->pr_cache = cm;
      }
      
      static inline void
      pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
      {
              unsigned long *entry = (unsigned long *)&ci->ci_nextl;
      
              entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
              entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
      }
      
      static inline void
      pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
      {
              unsigned long *entry;
              unsigned long val;
      
              entry = (unsigned long *)&ci->ci_nextl;
              val = pp->pr_cache_magic[0] ^ (u_long)ci;
              if (*entry != val)
                      goto fail;
      
              entry++;
              val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
              if (*entry != val)
                      goto fail;
      
              return;
      
      fail:
              panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
                  __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
                  *entry, val);
      }
      
      static inline void
      pool_list_enter(struct pool *pp)
      {
              if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
                      pl_enter(pp, &pp->pr_cache_lock);
                      pp->pr_cache_contention++;
              }
      }
      
      static inline void
      pool_list_leave(struct pool *pp)
      {
              pl_leave(pp, &pp->pr_cache_lock);
      }
      
      static inline struct pool_cache_item *
      pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
      {
              struct pool_cache_item *pl;
      
              pool_list_enter(pp);
              pl = TAILQ_FIRST(&pp->pr_cache_lists);
              if (pl != NULL) {
                      TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
                      pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
      
                      pool_cache_item_magic(pp, pl);
      
                      pc->pc_nlget++;
              } else
                      pc->pc_nlfail++;
      
              /* fold this cpus nout into the global while we have the lock */
              pp->pr_cache_nout += pc->pc_nout;
              pc->pc_nout = 0;
              pool_list_leave(pp);
      
              return (pl);
      }
      
      static inline void
      pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
          struct pool_cache_item *ci)
      {
              pool_list_enter(pp);
              if (TAILQ_EMPTY(&pp->pr_cache_lists))
                      pp->pr_cache_tick = ticks;
      
              pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
              TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
      
              pc->pc_nlput++;
      
              /* fold this cpus nout into the global while we have the lock */
              pp->pr_cache_nout += pc->pc_nout;
              pc->pc_nout = 0;
              pool_list_leave(pp);
      }
      
      static inline struct pool_cache *
      pool_cache_enter(struct pool *pp, int *s)
      {
              struct pool_cache *pc;
      
              pc = cpumem_enter(pp->pr_cache);
              *s = splraise(pp->pr_ipl);
              pc->pc_gen++;
      
              return (pc);
      }
      
      static inline void
      pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
      {
              pc->pc_gen++;
              splx(s);
              cpumem_leave(pp->pr_cache, pc);
      }
      
      void *
      pool_cache_get(struct pool *pp)
      {
              struct pool_cache *pc;
              struct pool_cache_item *ci;
              int s;
      
              pc = pool_cache_enter(pp, &s);
      
              if (pc->pc_actv != NULL) {
                      ci = pc->pc_actv;
              } else if (pc->pc_prev != NULL) {
                      ci = pc->pc_prev;
                      pc->pc_prev = NULL;
              } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
                      pc->pc_nfail++;
                      goto done;
              }
      
              pool_cache_item_magic_check(pp, ci);
      #ifdef DIAGNOSTIC
              if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
                      size_t pidx;
                      uint32_t pval;
      
                      if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
                          &pidx, &pval)) {
                              int *ip = (int *)(ci + 1);
                              ip += pidx;
      
                              panic("%s: %s cpu free list modified: "
                                  "item addr %p+%zu 0x%x!=0x%x",
                                  __func__, pp->pr_wchan, ci,
                                  (caddr_t)ip - (caddr_t)ci, *ip, pval);
                      }
              }
      #endif
      
              pc->pc_actv = ci->ci_next;
              pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
              pc->pc_nget++;
              pc->pc_nout++;
      
      done:
              pool_cache_leave(pp, pc, s);
      
              return (ci);
      }
      
      void
      pool_cache_put(struct pool *pp, void *v)
      {
              struct pool_cache *pc;
              struct pool_cache_item *ci = v;
              unsigned long nitems;
              int s;
      #ifdef DIAGNOSTIC
              int poison = pool_debug && pp->pr_size > sizeof(*ci);
      
              if (poison)
                      poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
      #endif
      
              pc = pool_cache_enter(pp, &s);
      
              nitems = pc->pc_nactv;
              if (nitems >= pp->pr_cache_items) {
                      if (pc->pc_prev != NULL)
                              pool_cache_list_free(pp, pc, pc->pc_prev);
      
                      pc->pc_prev = pc->pc_actv;
      
                      pc->pc_actv = NULL;
                      pc->pc_nactv = 0;
                      nitems = 0;
              }
      
              ci->ci_next = pc->pc_actv;
              ci->ci_nitems = ++nitems;
      #ifdef DIAGNOSTIC
              ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
      #endif
              pool_cache_item_magic(pp, ci);
      
              pc->pc_actv = ci;
              pc->pc_nactv = nitems;
      
              pc->pc_nput++;
              pc->pc_nout--;
      
              pool_cache_leave(pp, pc, s);
      }
      
      struct pool_cache_item *
      pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
      {
              struct pool_cache_item *rpl, *next;
      
              if (pl == NULL)
                      return (NULL);
      
              rpl = TAILQ_NEXT(pl, ci_nextl);
      
              pl_enter(pp, &pp->pr_lock);
              do {
                      next = pl->ci_next;
                      pool_do_put(pp, pl);
                      pl = next;
              } while (pl != NULL);
              pl_leave(pp, &pp->pr_lock);
      
              return (rpl);
      }
      
      void
      pool_cache_destroy(struct pool *pp)
      {
              struct pool_cache *pc;
              struct pool_cache_item *pl;
              struct cpumem_iter i;
              struct cpumem *cm;
      
              rw_enter_write(&pool_lock); /* serialise with the gc */
              cm = pp->pr_cache;
              pp->pr_cache = NULL; /* make pool_put avoid the cache */
              rw_exit_write(&pool_lock);
      
              CPUMEM_FOREACH(pc, &i, cm) {
                      pool_cache_list_put(pp, pc->pc_actv);
                      pool_cache_list_put(pp, pc->pc_prev);
              }
      
              cpumem_put(&pool_caches, cm);
      
              pl = TAILQ_FIRST(&pp->pr_cache_lists);
              while (pl != NULL)
                      pl = pool_cache_list_put(pp, pl);
      }
      
      void
      pool_cache_gc(struct pool *pp)
      {
              unsigned int contention, delta;
      
              if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
                  !TAILQ_EMPTY(&pp->pr_cache_lists) &&
                  pl_enter_try(pp, &pp->pr_cache_lock)) {
                      struct pool_cache_item *pl = NULL;
      
                      pl = TAILQ_FIRST(&pp->pr_cache_lists);
                      if (pl != NULL) {
                              TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
                              pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
                              pp->pr_cache_tick = ticks;
      
                              pp->pr_cache_ngc++;
                      }
      
                      pl_leave(pp, &pp->pr_cache_lock);
      
                      pool_cache_list_put(pp, pl);
              }
      
              /*
               * if there's a lot of contention on the pr_cache_mtx then consider
               * growing the length of the list to reduce the need to access the
               * global pool.
               */
      
              contention = pp->pr_cache_contention;
              delta = contention - pp->pr_cache_contention_prev;
              if (delta > 8 /* magic */) {
                      if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
                              pp->pr_cache_items += 8;
              } else if (delta == 0) {
                      if (pp->pr_cache_items > 8)
                              pp->pr_cache_items--;
              }
              pp->pr_cache_contention_prev = contention;
      }
      
      void
      pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
      {
              struct pool_cache *pc;
              struct cpumem_iter i;
      
              if (pp->pr_cache == NULL)
                      return;
      
              /* loop through the caches twice to collect stats */
      
              /* once without the lock so we can yield while reading nget/nput */
              CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
                      uint64_t gen, nget, nput;
      
                      do {
                              while ((gen = pc->pc_gen) & 1)
                                      yield();
      
                              nget = pc->pc_nget;
                              nput = pc->pc_nput;
                      } while (gen != pc->pc_gen);
      
                      pi->pr_nget += nget;
                      pi->pr_nput += nput;
              }
      
              /* and once with the mtx so we can get consistent nout values */
              pl_enter(pp, &pp->pr_cache_lock);
              CPUMEM_FOREACH(pc, &i, pp->pr_cache)
                      pi->pr_nout += pc->pc_nout;
      
              pi->pr_nout += pp->pr_cache_nout;
              pl_leave(pp, &pp->pr_cache_lock);
      }
      
      int
      pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
      {
              struct kinfo_pool_cache kpc;
      
              if (pp->pr_cache == NULL)
                      return (EOPNOTSUPP);
      
              memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
      
              pl_enter(pp, &pp->pr_cache_lock);
              kpc.pr_ngc = pp->pr_cache_ngc;
              kpc.pr_len = pp->pr_cache_items;
              kpc.pr_nitems = pp->pr_cache_nitems;
              kpc.pr_contention = pp->pr_cache_contention;
              pl_leave(pp, &pp->pr_cache_lock);
      
              return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
      }
      
      int
      pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
      {
              struct pool_cache *pc;
              struct kinfo_pool_cache_cpu *kpcc, *info;
              unsigned int cpu = 0;
              struct cpumem_iter i;
              int error = 0;
              size_t len;
      
              if (pp->pr_cache == NULL)
                      return (EOPNOTSUPP);
              if (*oldlenp % sizeof(*kpcc))
                      return (EINVAL);
      
              kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
                  M_WAITOK|M_CANFAIL|M_ZERO);
              if (kpcc == NULL)
                      return (EIO);
      
              len = ncpusfound * sizeof(*kpcc);
      
              CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
                      uint64_t gen;
      
                      if (cpu >= ncpusfound) {
                              error = EIO;
                              goto err;
                      }
      
                      info = &kpcc[cpu];
                      info->pr_cpu = cpu;
      
                      do {
                              while ((gen = pc->pc_gen) & 1)
                                      yield();
      
                              info->pr_nget = pc->pc_nget;
                              info->pr_nfail = pc->pc_nfail;
                              info->pr_nput = pc->pc_nput;
                              info->pr_nlget = pc->pc_nlget;
                              info->pr_nlfail = pc->pc_nlfail;
                              info->pr_nlput = pc->pc_nlput;
                      } while (gen != pc->pc_gen);
      
                      cpu++;
              }
      
              error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
      err:
              free(kpcc, M_TEMP, len);
      
              return (error);
      }
      #else /* MULTIPROCESSOR */
      void
      pool_cache_init(struct pool *pp)
      {
              /* nop */
      }
      
      void
      pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
      {
              /* nop */
      }
      
      int
      pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
      {
              return (EOPNOTSUPP);
      }
      
      int
      pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
      {
              return (EOPNOTSUPP);
      }
      #endif /* MULTIPROCESSOR */
      
      
      void
      pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
          const struct lock_type *type)
    3 {
              _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
      }
      
      void
      pool_lock_mtx_enter(union pool_lock *lock)
 5647 {
              mtx_enter(&lock->prl_mtx);
      }
      
      int
      pool_lock_mtx_enter_try(union pool_lock *lock)
      {
              return (mtx_enter_try(&lock->prl_mtx));
      }
      
      void
      pool_lock_mtx_leave(union pool_lock *lock)
 5647 {
              mtx_leave(&lock->prl_mtx);
      }
      
      void
      pool_lock_mtx_assert_locked(union pool_lock *lock)
 5335 {
              MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
 5335 }
      
      void
      pool_lock_mtx_assert_unlocked(union pool_lock *lock)
   75 {
              MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
   75 }
      
      int
      pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
          const char *wmesg, int timo)
      {
              return msleep(ident, &lock->prl_mtx, priority, wmesg, timo);
      }
      
      static const struct pool_lock_ops pool_lock_ops_mtx = {
              pool_lock_mtx_init,
              pool_lock_mtx_enter,
              pool_lock_mtx_enter_try,
              pool_lock_mtx_leave,
              pool_lock_mtx_assert_locked,
              pool_lock_mtx_assert_unlocked,
              pool_lock_mtx_sleep,
      };
      
      void
      pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
          const struct lock_type *type)
      {
              _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
      }
      
      void
      pool_lock_rw_enter(union pool_lock *lock)
  110 {
              rw_enter_write(&lock->prl_rwlock);
      }
      
      int
      pool_lock_rw_enter_try(union pool_lock *lock)
      {
              return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
      }
      
      void
      pool_lock_rw_leave(union pool_lock *lock)
  110 {
              rw_exit_write(&lock->prl_rwlock);
      }
      
      void
      pool_lock_rw_assert_locked(union pool_lock *lock)
  107 {
              rw_assert_wrlock(&lock->prl_rwlock);
      }
      
      void
      pool_lock_rw_assert_unlocked(union pool_lock *lock)
    1 {
              KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
    1 }
      
      int
      pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
          const char *wmesg, int timo)
      {
              return rwsleep(ident, &lock->prl_rwlock, priority, wmesg, timo);
      }
      
      static const struct pool_lock_ops pool_lock_ops_rw = {
              pool_lock_rw_init,
              pool_lock_rw_enter,
              pool_lock_rw_enter_try,
              pool_lock_rw_leave,
              pool_lock_rw_assert_locked,
              pool_lock_rw_assert_unlocked,
              pool_lock_rw_sleep,
      };
      /*        $OpenBSD: virtio.c,v 1.19 2019/05/26 15:20:04 sf Exp $        */
      /*        $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $        */
      
      /*
       * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
       * Copyright (c) 2010 Minoura Makoto.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/kernel.h>
      #include <sys/device.h>
      #include <sys/mutex.h>
      #include <sys/atomic.h>
      #include <sys/malloc.h>
      
      #include <dev/pv/virtioreg.h>
      #include <dev/pv/virtiovar.h>
      
      #if VIRTIO_DEBUG
      #define VIRTIO_ASSERT(x)        KASSERT(x)
      #else
      #define VIRTIO_ASSERT(x)
      #endif
      
      void                 virtio_init_vq(struct virtio_softc *,
                                      struct virtqueue *);
      void                 vq_free_entry(struct virtqueue *, struct vq_entry *);
      struct vq_entry        *vq_alloc_entry(struct virtqueue *);
      
      struct cfdriver virtio_cd = {
              NULL, "virtio", DV_DULL
      };
      
      static const char * const virtio_device_name[] = {
              "Unknown (0)",                /* 0 */
              "Network",                /* 1 */
              "Block",                /* 2 */
              "Console",                /* 3 */
              "Entropy",                /* 4 */
              "Memory Balloon",        /* 5 */
              "IO Memory",                /* 6 */
              "Rpmsg",                /* 7 */
              "SCSI host",                /* 8 */
              "9P Transport",                /* 9 */
              "mac80211 wlan"                /* 10 */
      };
      #define NDEVNAMES        (sizeof(virtio_device_name)/sizeof(char*))
      
      const char *
      virtio_device_string(int id)
      {
              return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
      }
      
      #if VIRTIO_DEBUG
      static const struct virtio_feature_name transport_feature_names[] = {
              { VIRTIO_F_NOTIFY_ON_EMPTY,        "NotifyOnEmpty"},
              { VIRTIO_F_RING_INDIRECT_DESC,        "RingIndirectDesc"},
              { VIRTIO_F_RING_EVENT_IDX,        "RingEventIdx"},
              { VIRTIO_F_BAD_FEATURE,                "BadFeature"},
              { VIRTIO_F_VERSION_1,                "Version1"},
              { 0,                                NULL}
      };
      
      void
      virtio_log_features(uint64_t host, uint64_t neg,
          const struct virtio_feature_name *guest_feature_names)
      {
              const struct virtio_feature_name *namep;
              int i;
              char c;
              uint32_t bit;
      
              for (i = 0; i < 64; i++) {
                      if (i == 30) {
                              /*
                               * VIRTIO_F_BAD_FEATURE is only used for
                               * checking correct negotiation
                               */
                              continue;
                      }
                      bit = 1 << i;
                      if ((host&bit) == 0)
                              continue;
                      namep = (i < 24 || i > 37) ? guest_feature_names :
                          transport_feature_names;
                      while (namep->bit && namep->bit != bit)
                              namep++;
                      c = (neg&bit) ? '+' : '-';
                      if (namep->name)
                              printf(" %c%s", c, namep->name);
                      else
                              printf(" %cUnknown(%d)", c, i);
              }
      }
      #endif
      
      /*
       * Reset the device.
       */
      /*
       * To reset the device to a known state, do following:
       *        virtio_reset(sc);             // this will stop the device activity
       *        <dequeue finished requests>; // virtio_dequeue() still can be called
       *        <revoke pending requests in the vqs if any>;
       *        virtio_reinit_start(sc);     // dequeue prohibitted
       *        <some other initialization>;
       *        virtio_reinit_end(sc);             // device activated; enqueue allowed
       * Once attached, features are assumed to not change again.
       */
      void
      virtio_reset(struct virtio_softc *sc)
      {
              virtio_device_reset(sc);
              sc->sc_active_features = 0;
      }
      
      void
      virtio_reinit_start(struct virtio_softc *sc)
      {
              int i;
      
              virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
              virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
              virtio_negotiate_features(sc, NULL);
              for (i = 0; i < sc->sc_nvqs; i++) {
                      int n;
                      struct virtqueue *vq = &sc->sc_vqs[i];
                      n = virtio_read_queue_size(sc, vq->vq_index);
                      if (n == 0)        /* vq disappeared */
                              continue;
                      if (n != vq->vq_num) {
                              panic("%s: virtqueue size changed, vq index %d\n",
                                  sc->sc_dev.dv_xname, vq->vq_index);
                      }
                      virtio_init_vq(sc, vq);
                      virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
              }
      }
      
      void
      virtio_reinit_end(struct virtio_softc *sc)
      {
              virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
      }
      
      /*
       * dmamap sync operations for a virtqueue.
       */
      static inline void
      vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
      {
              /* availoffset == sizeof(vring_desc)*vq_num */
              bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
                  ops);
      }
      
      static inline void
      vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
      {
              bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_availoffset,
                  offsetof(struct vring_avail, ring) + vq->vq_num * sizeof(uint16_t),
                  ops);
      }
      
      static inline void
      vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
      {
              bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_usedoffset,
                  offsetof(struct vring_used, ring) + vq->vq_num *
                  sizeof(struct vring_used_elem), ops);
      }
      
      static inline void
      vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
          int ops)
      {
              int offset = vq->vq_indirectoffset +
                  sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
      
              bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, offset,
                  sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
      }
      
      /*
       * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
       * and calls (*vq_done)() if some entries are consumed.
       * For use in transport specific irq handlers.
       */
      int
      virtio_check_vqs(struct virtio_softc *sc)
      {
              struct virtqueue *vq;
              int i, r = 0;
      
              /* going backwards is better for if_vio */
              for (i = sc->sc_nvqs - 1; i >= 0; i--) {
                      vq = &sc->sc_vqs[i];
                      if (vq->vq_queued) {
                              vq->vq_queued = 0;
                              vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
                      }
                      vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
                      if (vq->vq_used_idx != vq->vq_used->idx) {
                              if (vq->vq_done)
                                      r |= (vq->vq_done)(vq);
                      }
              }
      
              return r;
      }
      
      /*
       * Initialize vq structure.
       */
      void
      virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
      {
              int i, j;
              int vq_size = vq->vq_num;
      
              memset(vq->vq_vaddr, 0, vq->vq_bytesize);
      
              /* build the indirect descriptor chain */
              if (vq->vq_indirect != NULL) {
                      struct vring_desc *vd;
      
                      for (i = 0; i < vq_size; i++) {
                              vd = vq->vq_indirect;
                              vd += vq->vq_maxnsegs * i;
                              for (j = 0; j < vq->vq_maxnsegs-1; j++)
                                      vd[j].next = j + 1;
                      }
              }
      
              /* free slot management */
              SLIST_INIT(&vq->vq_freelist);
              /*
               * virtio_enqueue_trim needs monotonely raising entries, therefore
               * initialize in reverse order
               */
              for (i = vq_size - 1; i >= 0; i--) {
                      SLIST_INSERT_HEAD(&vq->vq_freelist, &vq->vq_entries[i],
                          qe_list);
                      vq->vq_entries[i].qe_index = i;
              }
      
              /* enqueue/dequeue status */
              vq->vq_avail_idx = 0;
              vq->vq_used_idx = 0;
              vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
              vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
              vq->vq_queued = 1;
      }
      
      /*
       * Allocate/free a vq.
       *
       * maxnsegs denotes how much space should be allocated for indirect
       * descriptors. maxnsegs == 1 can be used to disable use indirect
       * descriptors for this queue.
       */
      int
      virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
          int maxsegsize, int maxnsegs, const char *name)
      {
              int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
              int rsegs, r, hdrlen;
      #define VIRTQUEUE_ALIGN(n)        (((n)+(VIRTIO_PAGE_SIZE-1))&        \
                                       ~(VIRTIO_PAGE_SIZE-1))
      
              memset(vq, 0, sizeof(*vq));
      
              vq_size = virtio_read_queue_size(sc, index);
              if (vq_size == 0) {
                      printf("virtqueue not exist, index %d for %s\n", index, name);
                      goto err;
              }
              if (((vq_size - 1) & vq_size) != 0)
                      panic("vq_size not power of two: %d", vq_size);
      
              hdrlen = virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;
      
              /* allocsize1: descriptor table + avail ring + pad */
              allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
                  + sizeof(uint16_t) * (hdrlen + vq_size));
              /* allocsize2: used ring + pad */
              allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
                  + sizeof(struct vring_used_elem) * vq_size);
              /* allocsize3: indirect table */
              if (sc->sc_indirect && maxnsegs > 1)
                      allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
              else
                      allocsize3 = 0;
              allocsize = allocsize1 + allocsize2 + allocsize3;
      
              /* alloc and map the memory */
              r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
                  &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
              if (r != 0) {
                      printf("virtqueue %d for %s allocation failed, error %d\n",
                             index, name, r);
                      goto err;
              }
              r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
                  (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
              if (r != 0) {
                      printf("virtqueue %d for %s map failed, error %d\n", index,
                          name, r);
                      goto err;
              }
              r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
                  BUS_DMA_NOWAIT, &vq->vq_dmamap);
              if (r != 0) {
                      printf("virtqueue %d for %s dmamap creation failed, "
                          "error %d\n", index, name, r);
                      goto err;
              }
              r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap, vq->vq_vaddr,
                  allocsize, NULL, BUS_DMA_NOWAIT);
              if (r != 0) {
                      printf("virtqueue %d for %s dmamap load failed, error %d\n",
                          index, name, r);
                      goto err;
              }
      
              /* remember addresses and offsets for later use */
              vq->vq_owner = sc;
              vq->vq_num = vq_size;
              vq->vq_mask = vq_size - 1;
              vq->vq_index = index;
              vq->vq_desc = vq->vq_vaddr;
              vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
              vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
                  vq->vq_availoffset);
              vq->vq_usedoffset = allocsize1;
              vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
                  vq->vq_usedoffset);
              if (allocsize3 > 0) {
                      vq->vq_indirectoffset = allocsize1 + allocsize2;
                      vq->vq_indirect = (void*)(((char*)vq->vq_desc)
                          + vq->vq_indirectoffset);
              }
              vq->vq_bytesize = allocsize;
              vq->vq_maxnsegs = maxnsegs;
      
              /* free slot management */
              vq->vq_entries = mallocarray(vq_size, sizeof(struct vq_entry),
                  M_DEVBUF, M_NOWAIT | M_ZERO);
              if (vq->vq_entries == NULL) {
                      r = ENOMEM;
                      goto err;
              }
      
              virtio_init_vq(sc, vq);
              virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
      
      #if VIRTIO_DEBUG
              printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
                  allocsize, index, name, vq_size);
              if (allocsize3 > 0)
                      printf("using %d byte (%d entries) indirect descriptors\n",
                          allocsize3, maxnsegs * vq_size);
      #endif
              return 0;
      
      err:
              if (vq->vq_dmamap)
                      bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
              if (vq->vq_vaddr)
                      bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
              if (vq->vq_segs[0].ds_addr)
                      bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
              memset(vq, 0, sizeof(*vq));
      
              return -1;
      }
      
      int
      virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
      {
              struct vq_entry *qe;
              int i = 0;
      
              /* device must be already deactivated */
              /* confirm the vq is empty */
              SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) {
                      i++;
              }
              if (i != vq->vq_num) {
                      printf("%s: freeing non-empty vq, index %d\n",
                          sc->sc_dev.dv_xname, vq->vq_index);
                      return EBUSY;
              }
      
              /* tell device that there's no virtqueue any longer */
              virtio_setup_queue(sc, vq, 0);
      
              free(vq->vq_entries, M_DEVBUF, 0);
              bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
              bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
              bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
              bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
              memset(vq, 0, sizeof(*vq));
      
              return 0;
      }
      
      /*
       * Free descriptor management.
       */
      struct vq_entry *
      vq_alloc_entry(struct virtqueue *vq)
      {
              struct vq_entry *qe;
      
              if (SLIST_EMPTY(&vq->vq_freelist))
                      return NULL;
              qe = SLIST_FIRST(&vq->vq_freelist);
              SLIST_REMOVE_HEAD(&vq->vq_freelist, qe_list);
      
              return qe;
      }
      
      void
      vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
      {
              SLIST_INSERT_HEAD(&vq->vq_freelist, qe, qe_list);
      }
      
      /*
       * Enqueue several dmamaps as a single request.
       */
      /*
       * Typical usage:
       *  <queue size> number of followings are stored in arrays
       *  - command blocks (in dmamem) should be pre-allocated and mapped
       *  - dmamaps for command blocks should be pre-allocated and loaded
       *  - dmamaps for payload should be pre-allocated
       *        r = virtio_enqueue_prep(sc, vq, &slot);                // allocate a slot
       *        if (r)                // currently 0 or EAGAIN
       *          return r;
       *        r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
       *        if (r) {
       *          virtio_enqueue_abort(sc, vq, slot);
       *          bus_dmamap_unload(dmat, dmamap_payload[slot]);
       *          return r;
       *        }
       *        r = virtio_enqueue_reserve(sc, vq, slot,
       *                                   dmamap_payload[slot]->dm_nsegs+1);
       *                                                        // ^ +1 for command
       *        if (r) {        // currently 0 or EAGAIN
       *          bus_dmamap_unload(dmat, dmamap_payload[slot]);
       *          return r;                                        // do not call abort()
       *        }
       *        <setup and prepare commands>
       *        bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
       *        bus_dmamap_sync(dmat, dmamap_payload[slot],...);
       *        virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
       *        virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
       *        virtio_enqueue_commit(sc, vq, slot, 1);
       *
       * Alternative usage with statically allocated slots:
       *        <during initialization>
       *        // while not out of slots, do
       *        virtio_enqueue_prep(sc, vq, &slot);                // allocate a slot
       *        virtio_enqueue_reserve(sc, vq, slot, max_segs);        // reserve all slots
       *                                                that may ever be needed
       *
       *        <when enqueing a request>
       *        // Don't call virtio_enqueue_prep()
       *        bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
       *        bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
       *        bus_dmamap_sync(dmat, dmamap_payload[slot],...);
       *        virtio_enqueue_trim(sc, vq, slot, num_segs_needed);
       *        virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
       *        virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
       *        virtio_enqueue_commit(sc, vq, slot, 1);
       *
       *        <when dequeuing>
       *        // don't call virtio_dequeue_commit()
       */
      
      /*
       * enqueue_prep: allocate a slot number
       */
      int
      virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
      {
              struct vq_entry *qe1;
      
              VIRTIO_ASSERT(slotp != NULL);
      
              qe1 = vq_alloc_entry(vq);
              if (qe1 == NULL)
                      return EAGAIN;
              /* next slot is not allocated yet */
              qe1->qe_next = -1;
              *slotp = qe1->qe_index;
      
              return 0;
      }
      
      /*
       * enqueue_reserve: allocate remaining slots and build the descriptor chain.
       * Calls virtio_enqueue_abort() on failure.
       */
      int
      virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
      {
              struct vq_entry *qe1 = &vq->vq_entries[slot];
      
              VIRTIO_ASSERT(qe1->qe_next == -1);
              VIRTIO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);
      
              if (vq->vq_indirect != NULL && nsegs > 1 && nsegs <= vq->vq_maxnsegs) {
                      struct vring_desc *vd;
                      int i;
      
                      qe1->qe_indirect = 1;
      
                      vd = &vq->vq_desc[qe1->qe_index];
                      vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr +
                          vq->vq_indirectoffset;
                      vd->addr += sizeof(struct vring_desc) * vq->vq_maxnsegs *
                          qe1->qe_index;
                      vd->len = sizeof(struct vring_desc) * nsegs;
                      vd->flags = VRING_DESC_F_INDIRECT;
      
                      vd = vq->vq_indirect;
                      vd += vq->vq_maxnsegs * qe1->qe_index;
                      qe1->qe_desc_base = vd;
      
                      for (i = 0; i < nsegs-1; i++)
                              vd[i].flags = VRING_DESC_F_NEXT;
                      vd[i].flags = 0;
                      qe1->qe_next = 0;
      
                      return 0;
              } else {
                      struct vring_desc *vd;
                      struct vq_entry *qe;
                      int i, s;
      
                      qe1->qe_indirect = 0;
      
                      vd = &vq->vq_desc[0];
                      qe1->qe_desc_base = vd;
                      qe1->qe_next = qe1->qe_index;
                      s = slot;
                      for (i = 0; i < nsegs - 1; i++) {
                              qe = vq_alloc_entry(vq);
                              if (qe == NULL) {
                                      vd[s].flags = 0;
                                      virtio_enqueue_abort(vq, slot);
                                      return EAGAIN;
                              }
                              vd[s].flags = VRING_DESC_F_NEXT;
                              vd[s].next = qe->qe_index;
                              s = qe->qe_index;
                      }
                      vd[s].flags = 0;
      
                      return 0;
              }
      }
      
      /*
       * enqueue: enqueue a single dmamap.
       */
      int
      virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
  231 {
              struct vq_entry *qe1 = &vq->vq_entries[slot];
              struct vring_desc *vd = qe1->qe_desc_base;
              int i;
              int s = qe1->qe_next;
      
              VIRTIO_ASSERT(s >= 0);
              VIRTIO_ASSERT(dmamap->dm_nsegs > 0);
              if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
      #if VIRTIO_DEBUG
                      for (i = 0; i < dmamap->dm_nsegs; i++) {
                              printf(" %d (%d): %p %lx \n", i, write,
                                  (void *)dmamap->dm_segs[i].ds_addr,
                                  dmamap->dm_segs[i].ds_len);
                      }
      #endif
                      panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d\n",
                          dmamap->dm_nsegs, vq->vq_maxnsegs);
              }
      
  231         for (i = 0; i < dmamap->dm_nsegs; i++) {
                      vd[s].addr = dmamap->dm_segs[i].ds_addr;
                      vd[s].len = dmamap->dm_segs[i].ds_len;
  225                 if (!write)
   13                         vd[s].flags |= VRING_DESC_F_WRITE;
                      s = vd[s].next;
              }
              qe1->qe_next = s;
      
              return 0;
      }
      
      int
      virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
          bus_addr_t start, bus_size_t len, int write)
  232 {
              struct vq_entry *qe1 = &vq->vq_entries[slot];
              struct vring_desc *vd = qe1->qe_desc_base;
              int s = qe1->qe_next;
      
              VIRTIO_ASSERT(s >= 0);
              /* XXX todo: handle more segments */
              VIRTIO_ASSERT(dmamap->dm_nsegs == 1);
              VIRTIO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
                  (dmamap->dm_segs[0].ds_len >= start + len));
      
              vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
              vd[s].len = len;
  232         if (!write)
  232                 vd[s].flags |= VRING_DESC_F_WRITE;
              qe1->qe_next = vd[s].next;
      
              return 0;
      }
      
      static void
      publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
      {
              vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
      
              virtio_membar_producer();
              vq->vq_avail->idx = vq->vq_avail_idx;
              vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
              vq->vq_queued = 1;
      }
      
      /*
       * enqueue_commit: add it to the aring.
       */
      void
      virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
          int notifynow)
  232 {
              struct vq_entry *qe1;
      
              if (slot < 0)
                      goto notify;
              vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
              qe1 = &vq->vq_entries[slot];
  232         if (qe1->qe_indirect)
                      vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
              vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;
      
      notify:
              if (notifynow) {
                      if (virtio_has_feature(vq->vq_owner, VIRTIO_F_RING_EVENT_IDX)) {
                              uint16_t o = vq->vq_avail->idx;
                              uint16_t n = vq->vq_avail_idx;
                              uint16_t t;
                              publish_avail_idx(sc, vq);
      
                              virtio_membar_sync();
                              t = VQ_AVAIL_EVENT(vq) + 1;
                              if ((uint16_t)(n - t) < (uint16_t)(n - o))
                                      sc->sc_ops->kick(sc, vq->vq_index);
                      } else {
                              publish_avail_idx(sc, vq);
      
                              virtio_membar_sync();
   90                         if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
  232                                 sc->sc_ops->kick(sc, vq->vq_index);
                      }
              }
      }
      
      /*
       * enqueue_abort: rollback.
       */
      int
      virtio_enqueue_abort(struct virtqueue *vq, int slot)
      {
              struct vq_entry *qe = &vq->vq_entries[slot];
              struct vring_desc *vd;
              int s;
      
              if (qe->qe_next < 0) {
                      vq_free_entry(vq, qe);
                      return 0;
              }
      
              s = slot;
              vd = &vq->vq_desc[0];
              while (vd[s].flags & VRING_DESC_F_NEXT) {
                      s = vd[s].next;
                      vq_free_entry(vq, qe);
                      qe = &vq->vq_entries[s];
              }
              vq_free_entry(vq, qe);
              return 0;
      }
      
      /*
       * enqueue_trim: adjust buffer size to given # of segments, a.k.a.
       * descriptors.
       */
      void
      virtio_enqueue_trim(struct virtqueue *vq, int slot, int nsegs)
  232 {
              struct vq_entry *qe1 = &vq->vq_entries[slot];
              struct vring_desc *vd = &vq->vq_desc[0];
              int i;
      
              if ((vd[slot].flags & VRING_DESC_F_INDIRECT) == 0) {
  232                 qe1->qe_next = qe1->qe_index;
                      /*
                       * N.B.: the vq_entries are ASSUMED to be a contiguous
                       *       block with slot being the index to the first one.
                       */
              } else {
                      qe1->qe_next = 0;
                      vd = &vq->vq_desc[qe1->qe_index];
                      vd->len = sizeof(struct vring_desc) * nsegs;
                      vd = qe1->qe_desc_base;
                      slot = 0;
              }
      
  232         for (i = 0; i < nsegs -1 ; i++) {
  232                 vd[slot].flags = VRING_DESC_F_NEXT;
                      slot++;
              }
              vd[slot].flags = 0;
      }
      
      /*
       * Dequeue a request.
       */
      /*
       * dequeue: dequeue a request from uring; dmamap_sync for uring is
       *            already done in the interrupt handler.
       */
      int
      virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
          int *slotp, int *lenp)
      {
              uint16_t slot, usedidx;
              struct vq_entry *qe;
      
              if (vq->vq_used_idx == vq->vq_used->idx)
                      return ENOENT;
              usedidx = vq->vq_used_idx++;
              usedidx &= vq->vq_mask;
      
              virtio_membar_consumer();
              slot = vq->vq_used->ring[usedidx].id;
              qe = &vq->vq_entries[slot];
      
              if (qe->qe_indirect)
                      vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
      
              if (slotp)
                      *slotp = slot;
              if (lenp)
                      *lenp = vq->vq_used->ring[usedidx].len;
      
              return 0;
      }
      
      /*
       * dequeue_commit: complete dequeue; the slot is recycled for future use.
       *                 if you forget to call this the slot will be leaked.
       *
       *                 Don't call this if you use statically allocated slots
       *                 and virtio_dequeue_trim().
       */
      int
      virtio_dequeue_commit(struct virtqueue *vq, int slot)
      {
              struct vq_entry *qe = &vq->vq_entries[slot];
              struct vring_desc *vd = &vq->vq_desc[0];
              int s = slot;
      
              while (vd[s].flags & VRING_DESC_F_NEXT) {
                      s = vd[s].next;
                      vq_free_entry(vq, qe);
                      qe = &vq->vq_entries[s];
              }
              vq_free_entry(vq, qe);
      
              return 0;
      }
      
      /*
       * Increase the event index in order to delay interrupts.
       * Returns 0 on success; returns 1 if the used ring has already advanced
       * too far, and the caller must process the queue again (otherewise, no
       * more interrupts will happen).
       */
      int
      virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
      {
              uint16_t        idx;
      
              idx = vq->vq_used_idx + nslots;
      
              /* set the new event index: avail_ring->used_event = idx */
              VQ_USED_EVENT(vq) = idx;
              virtio_membar_sync();
      
              vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
              vq->vq_queued++;
      
              if (nslots < virtio_nused(vq))
                      return 1;
      
              return 0;
      }
      
      /*
       * Postpone interrupt until 3/4 of the available descriptors have been
       * consumed.
       */
      int
      virtio_postpone_intr_smart(struct virtqueue *vq)
      {
              uint16_t        nslots;
      
              nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;
      
              return virtio_postpone_intr(vq, nslots);
      }
      
      /*
       * Postpone interrupt until all of the available descriptors have been
       * consumed.
       */
      int
      virtio_postpone_intr_far(struct virtqueue *vq)
      {
              uint16_t        nslots;
      
              nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);
      
              return virtio_postpone_intr(vq, nslots);
      }
      
      
      /*
       * Start/stop vq interrupt.  No guarantee.
       */
      void
      virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
      {
              if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX)) {
                      /*
                       * No way to disable the interrupt completely with
                       * RingEventIdx. Instead advance used_event by half
                       * the possible value. This won't happen soon and
                       * is far enough in the past to not trigger a spurios
                       * interrupt.
                       */
                      VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
              } else {
                      vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
              }
              vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
              vq->vq_queued++;
      }
      
      int
      virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
      {
              /*
               * If event index feature is negotiated, enabling
               * interrupts is done through setting the latest
               * consumed index in the used_event field
               */
              if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX))
                      VQ_USED_EVENT(vq) = vq->vq_used_idx;
              else
                      vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
      
              virtio_membar_sync();
      
              vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
              vq->vq_queued++;
      
              if (vq->vq_used_idx != vq->vq_used->idx)
                      return 1;
      
              return 0;
      }
      
      /*
       * Returns a number of slots in the used ring available to
       * be supplied to the avail ring.
       */
      int
      virtio_nused(struct virtqueue *vq)
      {
              uint16_t        n;
      
              n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
              VIRTIO_ASSERT(n <= vq->vq_num);
      
              return n;
      }
      
      #if VIRTIO_DEBUG
      void
      virtio_vq_dump(struct virtqueue *vq)
      {
              /* Common fields */
              printf(" + vq num: %d\n", vq->vq_num);
              printf(" + vq mask: 0x%X\n", vq->vq_mask);
              printf(" + vq index: %d\n", vq->vq_index);
              printf(" + vq used idx: %d\n", vq->vq_used_idx);
              printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
              printf(" + vq queued: %d\n",vq->vq_queued);
              /* Avail ring fields */
              printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
              printf(" + avail idx: %d\n", vq->vq_avail->idx);
              printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
              /* Used ring fields */
              printf(" + used flags: 0x%X\n",vq->vq_used->flags);
              printf(" + used idx: %d\n",vq->vq_used->idx);
              printf(" + used event: %d\n", VQ_USED_EVENT(vq));
              printf(" +++++++++++++++++++++++++++\n");
      }
      #endif
      /*        $OpenBSD: endian.h,v 1.7 2018/10/02 21:30:44 naddy Exp $        */
      
      /*-
       * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       *
       * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       */
      
      #ifndef _MACHINE_ENDIAN_H_
      #define _MACHINE_ENDIAN_H_
      
      #ifndef __FROM_SYS__ENDIAN
      #include <sys/_types.h>
      #endif
      
      static __inline __uint16_t
   29 __swap16md(__uint16_t _x)
      {
              __asm ("rorw $8, %w0" : "+r" (_x));
              return (_x);
      }
      
      static __inline __uint32_t
      __swap32md(__uint32_t _x)
      {
              __asm ("bswap %0" : "+r" (_x));
              return (_x);
      }
      
      static __inline __uint64_t
      __swap64md(__uint64_t _x)
      {
              __asm ("bswapq %0" : "+r" (_x));
              return (_x);
      }
      
      /* Tell sys/endian.h we have MD variants of the swap macros.  */
      #define __HAVE_MD_SWAP
      
      #define _BYTE_ORDER _LITTLE_ENDIAN
      
      #ifndef __FROM_SYS__ENDIAN
      #include <sys/endian.h>
      #endif
      
      #endif /* _MACHINE_ENDIAN_H_ */
      /*        $OpenBSD: icmp6.c,v 1.229 2018/12/25 19:28:25 denis Exp $        */
      /*        $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $        */
      
      /*
       * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the project nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      /*
       * Copyright (c) 1982, 1986, 1988, 1993
       *        The Regents of the University of California.  All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the University nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       *
       *        @(#)ip_icmp.c        8.2 (Berkeley) 1/4/94
       */
      
      #include "carp.h"
      #include "pf.h"
      
      #include <sys/param.h>
      #include <sys/systm.h>
      #include <sys/malloc.h>
      #include <sys/mbuf.h>
      #include <sys/sysctl.h>
      #include <sys/protosw.h>
      #include <sys/socket.h>
      #include <sys/socketvar.h>
      #include <sys/time.h>
      #include <sys/kernel.h>
      #include <sys/syslog.h>
      #include <sys/domain.h>
      
      #include <net/if.h>
      #include <net/if_var.h>
      #include <net/route.h>
      #include <net/if_dl.h>
      #include <net/if_types.h>
      
      #include <netinet/in.h>
      #include <netinet/ip.h>
      #include <netinet6/in6_var.h>
      #include <netinet/ip6.h>
      #include <netinet6/ip6_var.h>
      #include <netinet/icmp6.h>
      #include <netinet6/mld6_var.h>
      #include <netinet/in_pcb.h>
      #include <netinet6/nd6.h>
      #include <netinet6/ip6protosw.h>
      
      #if NCARP > 0
      #include <netinet/ip_carp.h>
      #endif
      
      #if NPF > 0
      #include <net/pfvar.h>
      #endif
      
      struct cpumem *icmp6counters;
      
      extern int icmp6errppslim;
      static int icmp6errpps_count = 0;
      static struct timeval icmp6errppslim_last;
      
      /*
       * List of callbacks to notify when Path MTU changes are made.
       */
      struct icmp6_mtudisc_callback {
              LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
              void (*mc_func)(struct sockaddr_in6 *, u_int);
      };
      
      LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
          LIST_HEAD_INITIALIZER(icmp6_mtudisc_callbacks);
      
      struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
      
      /* XXX do these values make any sense? */
      static int icmp6_mtudisc_hiwat = 1280;
      static int icmp6_mtudisc_lowat = 256;
      
      /*
       * keep track of # of redirect routes.
       */
      static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
      
      /* XXX experimental, turned off */
      static int icmp6_redirect_lowat = -1;
      
      void        icmp6_errcount(int, int);
      int        icmp6_ratelimit(const struct in6_addr *, const int, const int);
      const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *,
                  struct in6_addr *);
      int        icmp6_notify_error(struct mbuf *, int, int, int);
      struct rtentry *icmp6_mtudisc_clone(struct sockaddr *, u_int);
      void        icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
      void        icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
      
      void
      icmp6_init(void)
      {
              mld6_init();
              icmp6_mtudisc_timeout_q = rt_timer_queue_create(ip6_mtudisc_timeout);
              icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
              icmp6counters = counters_alloc(icp6s_ncounters);
      }
      
      void
      icmp6_errcount(int type, int code)
      {
              enum icmp6stat_counters c = icp6s_ounknown;
      
              switch (type) {
              case ICMP6_DST_UNREACH:
    1                 switch (code) {
                      case ICMP6_DST_UNREACH_NOROUTE:
                              c = icp6s_odst_unreach_noroute;
                              break;
                      case ICMP6_DST_UNREACH_ADMIN:
                              c = icp6s_odst_unreach_admin;
                              break;
                      case ICMP6_DST_UNREACH_BEYONDSCOPE:
                              c = icp6s_odst_unreach_beyondscope;
                              break;
                      case ICMP6_DST_UNREACH_ADDR:
                              c = icp6s_odst_unreach_addr;
                              break;
                      case ICMP6_DST_UNREACH_NOPORT:
                              c = icp6s_odst_unreach_noport;
                              break;
                      }
                      break;
              case ICMP6_PACKET_TOO_BIG:
                      c = icp6s_opacket_too_big;
                      break;
              case ICMP6_TIME_EXCEEDED:
                      switch (code) {
                      case ICMP6_TIME_EXCEED_TRANSIT:
                              c = icp6s_otime_exceed_transit;
                              break;
                      case ICMP6_TIME_EXCEED_REASSEMBLY:
                              c = icp6s_otime_exceed_reassembly;
                              break;
                      }
                      break;
              case ICMP6_PARAM_PROB:
   51                 switch (code) {
                      case ICMP6_PARAMPROB_HEADER:
                              c = icp6s_oparamprob_header;
                              break;
                      case ICMP6_PARAMPROB_NEXTHEADER:
                              c = icp6s_oparamprob_nextheader;
                              break;
                      case ICMP6_PARAMPROB_OPTION:
                              c = icp6s_oparamprob_option;
                              break;
                      }
                      break;
              case ND_REDIRECT:
                      c = icp6s_oredirect;
                      break;
              }
      
              icmp6stat_inc(c);
      }
      
      /*
       * Register a Path MTU Discovery callback.
       */
      void
      icmp6_mtudisc_callback_register(void (*func)(struct sockaddr_in6 *, u_int))
      {
              struct icmp6_mtudisc_callback *mc;
      
              LIST_FOREACH(mc, &icmp6_mtudisc_callbacks, mc_list) {
                      if (mc->mc_func == func)
                              return;
              }
      
              mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
              if (mc == NULL)
                      panic("icmp6_mtudisc_callback_register");
      
              mc->mc_func = func;
              LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
      }
      
      struct mbuf *
      icmp6_do_error(struct mbuf *m, int type, int code, int param)
   52 {
              struct ip6_hdr *oip6, *nip6;
              struct icmp6_hdr *icmp6;
              u_int preplen;
              int off;
              int nxt;
      
              icmp6stat_inc(icp6s_error);
      
              /* count per-type-code statistics */
   52         icmp6_errcount(type, code);
      
   52         if (m->m_len < sizeof(struct ip6_hdr)) {
                      m = m_pullup(m, sizeof(struct ip6_hdr));
                      if (m == NULL)
                              return (NULL);
              }
              oip6 = mtod(m, struct ip6_hdr *);
      
              /*
               * If the destination address of the erroneous packet is a multicast
               * address, or the packet was sent using link-layer multicast,
               * we should basically suppress sending an error (RFC 2463, Section
               * 2.4).
               * We have two exceptions (the item e.2 in that section):
               * - the Packet Too Big message can be sent for path MTU discovery.
               * - the Parameter Problem Message that can be allowed an icmp6 error
               *   in the option type field.  This check has been done in
               *   ip6_unknown_opt(), so we can just check the type and code.
               */
              if ((m->m_flags & (M_BCAST|M_MCAST) ||
   51              IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
    1             (type != ICMP6_PACKET_TOO_BIG &&
   24              (type != ICMP6_PARAM_PROB ||
                    code != ICMP6_PARAMPROB_OPTION)))
                      goto freeit;
      
              /*
               * RFC 2463, 2.4 (e.5): source address check.
               * XXX: the case of anycast source?
               */
   33         if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
                  IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
                      goto freeit;
      
              /*
               * If we are about to send ICMPv6 against ICMPv6 error/redirect,
               * don't do it.
               */
              nxt = -1;
              off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
   28         if (off >= 0 && nxt == IPPROTO_ICMPV6) {
    1                 struct icmp6_hdr *icp;
      
    4                 IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
                              sizeof(*icp));
                      if (icp == NULL) {
                              icmp6stat_inc(icp6s_tooshort);
                              return (NULL);
                      }
    2                 if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
                          icp->icmp6_type == ND_REDIRECT) {
                              /*
                               * ICMPv6 error
                               * Special case: for redirect (which is
                               * informational) we must not send icmp6 error.
                               */
    2                         icmp6stat_inc(icp6s_canterror);
                              goto freeit;
                      } else {
                              /* ICMPv6 informational - send the error */
                      }
              }
              else {
                      /* non-ICMPv6 - send the error */
              }
      
              oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
      
              /* Finally, do rate limitation check. */
              if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
                      icmp6stat_inc(icp6s_toofreq);
                      goto freeit;
              }
      
              /*
               * OK, ICMP6 can be generated.
               */
      
   28         if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
    2                 m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
      
              preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
              M_PREPEND(m, preplen, M_DONTWAIT);
   30         if (m && m->m_len < preplen)
                      m = m_pullup(m, preplen);
              if (m == NULL) {
                      nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
                      return (NULL);
              }
      
              nip6 = mtod(m, struct ip6_hdr *);
              nip6->ip6_src  = oip6->ip6_src;
              nip6->ip6_dst  = oip6->ip6_dst;
      
   30         if (IN6_IS_SCOPE_EMBED(&oip6->ip6_src))
                      oip6->ip6_src.s6_addr16[1] = 0;
   30         if (IN6_IS_SCOPE_EMBED(&oip6->ip6_dst))
                      oip6->ip6_dst.s6_addr16[1] = 0;
      
              icmp6 = (struct icmp6_hdr *)(nip6 + 1);
              icmp6->icmp6_type = type;
              icmp6->icmp6_code = code;
              icmp6->icmp6_pptr = htonl((u_int32_t)param);
      
              /*
               * icmp6_reflect() is designed to be in the input path.
               * icmp6_error() can be called from both input and outut path,
               * and if we are in output path rcvif could contain bogus value.
               * clear m->m_pkthdr.ph_ifidx for safety, we should have enough
               * scope information in ip header (nip6).
               */
              m->m_pkthdr.ph_ifidx = 0;
      
              icmp6stat_inc(icp6s_outhist + type);
      
              return (m);
      
        freeit:
              /*
               * If we can't tell wheter or not we can generate ICMP6, free it.
               */
              return (m_freem(m));
      }
      
      /*
       * Generate an error packet of type error in response to bad IP6 packet.
       */
      void
      icmp6_error(struct mbuf *m, int type, int code, int param)
   52 {
              struct mbuf        *n;
      
              n = icmp6_do_error(m, type, code, param);
   22         if (n != NULL) {
                      /* header order: IPv6 - ICMPv6 */
    9                 if (!icmp6_reflect(n, sizeof(struct ip6_hdr), NULL))
   21                         ip6_send(n);
              }
      }                                                                    
      
      /*
       * Process a received ICMP6 message.
       */
      int
      icmp6_input(struct mbuf **mp, int *offp, int proto, int af)
   33 {
      #if NCARP > 0
              struct ifnet *ifp;
      #endif
              struct mbuf *m = *mp, *n;
              struct ip6_hdr *ip6, *nip6;
              struct icmp6_hdr *icmp6, *nicmp6;
              int off = *offp;
              int icmp6len = m->m_pkthdr.len - *offp;
              int code, sum, noff;
              char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
      
              /*
               * Locate icmp6 structure in mbuf, and check
               * that not corrupted and of at least minimum length
               */
      
              ip6 = mtod(m, struct ip6_hdr *);
              if (icmp6len < sizeof(struct icmp6_hdr)) {
    1                 icmp6stat_inc(icp6s_tooshort);
                      goto freeit;
              }
      
              /*
               * calculate the checksum
               */
   32         IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
              if (icmp6 == NULL) {
                      icmp6stat_inc(icp6s_tooshort);
                      return IPPROTO_DONE;
              }
              code = icmp6->icmp6_code;
      
              if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
    1                 nd6log((LOG_ERR,
                          "ICMP6 checksum error(%d|%x) %s\n",
                          icmp6->icmp6_type, sum,
                          inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src))));
                      icmp6stat_inc(icp6s_checksum);
                      goto freeit;
              }
      
      #if NPF > 0
   31         if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
                      switch (icmp6->icmp6_type) {
                      /*
                       * These ICMP6 types map to other connections.  They must be
                       * delivered to pr_ctlinput() also for diverted connections.
                       */
                      case ICMP6_DST_UNREACH:
                      case ICMP6_PACKET_TOO_BIG:
                      case ICMP6_TIME_EXCEEDED:
                      case ICMP6_PARAM_PROB:
                              /*
                               * Do not use the divert-to property of the TCP or UDP
                               * rule when doing the PCB lookup for the raw socket.
                               */
                              m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED;
                              break;
                      default:
                              goto raw;
                      }
              }
      #endif /* NPF */
      
      #if NCARP > 0
              ifp = if_get(m->m_pkthdr.ph_ifidx);
              if (ifp == NULL)
                      goto freeit;
      
   31         if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
                  carp_lsdrop(ifp, m, AF_INET6, ip6->ip6_src.s6_addr32,
                  ip6->ip6_dst.s6_addr32, 1)) {
                      if_put(ifp);
                      goto freeit;
              }
      
              if_put(ifp);
      #endif
              icmp6stat_inc(icp6s_inhist + icmp6->icmp6_type);
      
   15         switch (icmp6->icmp6_type) {
              case ICMP6_DST_UNREACH:
    1                 switch (code) {
                      case ICMP6_DST_UNREACH_NOROUTE:
                              code = PRC_UNREACH_NET;
                              break;
                      case ICMP6_DST_UNREACH_ADMIN:
                              code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
                              break;
                      case ICMP6_DST_UNREACH_ADDR:
                              code = PRC_HOSTDEAD;
                              break;
                      case ICMP6_DST_UNREACH_BEYONDSCOPE:
                              /* I mean "source address was incorrect." */
                              code = PRC_PARAMPROB;
                              break;
                      case ICMP6_DST_UNREACH_NOPORT:
                              code = PRC_UNREACH_PORT;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
      
              case ICMP6_PACKET_TOO_BIG:
                      /* MTU is checked in icmp6_mtudisc_update. */
                      code = PRC_MSGSIZE;
      
                      /*
                       * Updating the path MTU will be done after examining
                       * intermediate extension headers.
                       */
                      goto deliver;
      
              case ICMP6_TIME_EXCEEDED:
    1                 switch (code) {
                      case ICMP6_TIME_EXCEED_TRANSIT:
                              code = PRC_TIMXCEED_INTRANS;
                              break;
                      case ICMP6_TIME_EXCEED_REASSEMBLY:
                              code = PRC_TIMXCEED_REASS;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
      
              case ICMP6_PARAM_PROB:
    1                 switch (code) {
                      case ICMP6_PARAMPROB_NEXTHEADER:
                              code = PRC_UNREACH_PROTOCOL;
                              break;
                      case ICMP6_PARAMPROB_HEADER:
                      case ICMP6_PARAMPROB_OPTION:
                              code = PRC_PARAMPROB;
                              break;
                      default:
                              goto badcode;
                      }
                      goto deliver;
      
              case ICMP6_ECHO_REQUEST:
                      if (code != 0)
                              goto badcode;
                      /*
                       * Copy mbuf to send to two data paths: userland socket(s),
                       * and to the querier (echo reply).
                       * m: a copy for socket, n: a copy for querier
                       */
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* Give up local */
                              n = m;
                              m = *mp = NULL;
                              goto deliverecho;
                      }
                      /*
                       * If the first mbuf is shared, or the first mbuf is too short,
                       * copy the first part of the data into a fresh mbuf.
                       * Otherwise, we will wrongly overwrite both copies.
                       */
                      if ((n->m_flags & M_EXT) != 0 ||
                          n->m_len < off + sizeof(struct icmp6_hdr)) {
                              struct mbuf *n0 = n;
                              const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
      
                              /*
                               * Prepare an internal mbuf.  m_pullup() doesn't
                               * always copy the length we specified.
                               */
                              if (maxlen >= MCLBYTES) {
                                      /* Give up remote */
                                      m_freem(n0);
                                      break;
                              }
                              MGETHDR(n, M_DONTWAIT, n0->m_type);
                              if (n && maxlen >= MHLEN) {
                                      MCLGET(n, M_DONTWAIT);
                                      if ((n->m_flags & M_EXT) == 0) {
                                              m_free(n);
                                              n = NULL;
                                      }
                              }
                              if (n == NULL) {
                                      /* Give up local */
                                      m_freem(n0);
                                      n = m;
                                      m = *mp = NULL;
                                      goto deliverecho;
                              }
                              M_MOVE_PKTHDR(n, n0);
                              /*
                               * Copy IPv6 and ICMPv6 only.
                               */
                              nip6 = mtod(n, struct ip6_hdr *);
                              bcopy(ip6, nip6, sizeof(struct ip6_hdr));
                              nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
                              bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
                              noff = sizeof(struct ip6_hdr);
                              n->m_len = noff + sizeof(struct icmp6_hdr);
                              /*
                               * Adjust mbuf.  ip6_plen will be adjusted in
                               * ip6_output().
                               * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
                               */
                              n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
                              n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
                              m_adj(n0, off + sizeof(struct icmp6_hdr));
                              n->m_next = n0;
                      } else {
               deliverecho:
                              IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
                                  sizeof(*nicmp6));
                              noff = off;
                      }
                      if (n) {
                              nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
                              nicmp6->icmp6_code = 0;
                              icmp6stat_inc(icp6s_reflect);
                              icmp6stat_inc(icp6s_outhist + ICMP6_ECHO_REPLY);
                              if (!icmp6_reflect(n, noff, NULL))
                                      ip6_send(n);
                      }
                      if (!m)
                              goto freeit;
                      break;
      
              case ICMP6_ECHO_REPLY:
                      if (code != 0)
                              goto badcode;
                      break;
      
              case MLD_LISTENER_QUERY:
              case MLD_LISTENER_REPORT:
                      if (icmp6len < sizeof(struct mld_hdr))
                              goto badlen;
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* give up local */
                              mld6_input(m, off);
                              m = NULL;
                              goto freeit;
                      }
    7                 mld6_input(n, off);
                      /* m stays. */
                      break;
      
              case MLD_LISTENER_DONE:
                      if (icmp6len < sizeof(struct mld_hdr))        /* necessary? */
                              goto badlen;
                      break;                /* nothing to be done in kernel */
      
              case MLD_MTRACE_RESP:
              case MLD_MTRACE:
                      /* XXX: these two are experimental.  not officially defined. */
                      /* XXX: per-interface statistics? */
                      break;                /* just pass it to applications */
      
              case ICMP6_WRUREQUEST:        /* ICMP6_FQDN_QUERY */
                      /* IPv6 Node Information Queries are not supported */
                      break;
              case ICMP6_WRUREPLY:
                      break;
      
              case ND_ROUTER_SOLICIT:
              case ND_ROUTER_ADVERT:
                      if (code != 0)
                              goto badcode;
                      if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT && icmp6len <
                          sizeof(struct nd_router_solicit)) ||
                          (icmp6->icmp6_type == ND_ROUTER_ADVERT && icmp6len <
                          sizeof(struct nd_router_advert)))
                              goto badlen;
      
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* give up local */
                              nd6_rtr_cache(m, off, icmp6len,
                                  icmp6->icmp6_type);
                              m = NULL;
                              goto freeit;
                      }
    1                 nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type);
                      /* m stays. */
                      break;
      
              case ND_NEIGHBOR_SOLICIT:
                      if (code != 0)
                              goto badcode;
    1                 if (icmp6len < sizeof(struct nd_neighbor_solicit))
                              goto badlen;
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* give up local */
                              nd6_ns_input(m, off, icmp6len);
                              m = NULL;
                              goto freeit;
                      }
    2                 nd6_ns_input(n, off, icmp6len);
                      /* m stays. */
                      break;
      
              case ND_NEIGHBOR_ADVERT:
                      if (code != 0)
                              goto badcode;
    1                 if (icmp6len < sizeof(struct nd_neighbor_advert))
                              goto badlen;
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* give up local */
                              nd6_na_input(m, off, icmp6len);
                              m = NULL;
                              goto freeit;
                      }
                      nd6_na_input(n, off, icmp6len);
                      /* m stays. */
                      break;
      
              case ND_REDIRECT:
                      if (code != 0)
                              goto badcode;
                      if (icmp6len < sizeof(struct nd_redirect))
                              goto badlen;
                      if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                              /* give up local */
                              icmp6_redirect_input(m, off);
                              m = NULL;
                              goto freeit;
                      }
                      icmp6_redirect_input(n, off);
                      /* m stays. */
                      break;
      
              case ICMP6_ROUTER_RENUMBERING:
                      if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
                          code != ICMP6_ROUTER_RENUMBERING_RESULT)
                              goto badcode;
                      if (icmp6len < sizeof(struct icmp6_router_renum))
                              goto badlen;
                      break;
      
              default:
    9                 nd6log((LOG_DEBUG,
                          "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%u)\n",
                          icmp6->icmp6_type,
                          inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src)),
                          inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)),
                          m->m_pkthdr.ph_ifidx));
    9                 if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
                              /* ICMPv6 error: MUST deliver it by spec... */
                              code = PRC_NCMDS;
                              /* deliver */
                      } else {
                              /* ICMPv6 informational: MUST not deliver */
                              break;
                      }
      deliver:
   16                 if (icmp6_notify_error(m, off, icmp6len, code)) {
                              /* In this case, m should've been freed. */
                              return (IPPROTO_DONE);
                      }
                      break;
      
      badcode:
                      icmp6stat_inc(icp6s_badcode);
                      break;
      
      badlen:
                      icmp6stat_inc(icp6s_badlen);
                      break;
              }