768 766 766 171 690 18 732 23 737 9 19 735 752 134 580 5 3 3 56 29 55 28 16 8 23 13 509 236 25 8 2 16 17 1 3 2 2 15 17 15 14 747 91 719 621 9 132 3 15 741 737 2 737 3 687 733 686 733 10 18 737 6 12 31 163 2 142 131 14 719 713 22 30 493 629 684 576 85 15 583 58 163 499 546 145 1 1 2 25 1 25 25 25 25 25 /* $OpenBSD: vfs_lookup.c,v 1.84 2021/03/20 11:26:07 semarie Exp $ */ /* $NetBSD: vfs_lookup.c,v 1.17 1996/02/09 19:00:59 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.6 (Berkeley) 11/21/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/syslimits.h> #include <sys/time.h> #include <sys/namei.h> #include <sys/vnode.h> #include <sys/lock.h> #include <sys/mount.h> #include <sys/errno.h> #include <sys/pool.h> #include <sys/filedesc.h> #include <sys/proc.h> #include <sys/pledge.h> #include <sys/file.h> #include <sys/fcntl.h> #ifdef KTRACE #include <sys/ktrace.h> #endif int component_push(struct componentname *cnp, char *component, size_t len) { if (cnp->cn_rpi + len + 1 >= MAXPATHLEN) return 0; if (cnp->cn_rpi > 1) cnp->cn_rpbuf[cnp->cn_rpi++] = '/'; memcpy(cnp->cn_rpbuf + cnp->cn_rpi, component, len); cnp->cn_rpi+=len; cnp->cn_rpbuf[cnp->cn_rpi] = '\0'; return 1; } void component_pop(struct componentname *cnp) { while(cnp->cn_rpi && cnp->cn_rpbuf[cnp->cn_rpi] != '/' ) cnp->cn_rpi--; if (cnp->cn_rpi == 0 && cnp->cn_rpbuf[0] == '/') cnp->cn_rpi++; cnp->cn_rpbuf[cnp->cn_rpi] = '\0'; } void ndinitat(struct nameidata *ndp, u_long op, u_long flags, enum uio_seg segflg, int dirfd, const char *namep, struct proc *p) { memset(ndp, 0, sizeof(*ndp)); ndp->ni_cnd.cn_nameiop = op; ndp->ni_cnd.cn_flags = flags; ndp->ni_segflg = segflg; ndp->ni_dirfd = dirfd; ndp->ni_dirp = namep; ndp->ni_cnd.cn_proc = p; } /* * Convert a pathname into a pointer to a vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * If the LOCKLEAF flag is set, a locked vnode is returned. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { struct filedesc *fdp; /* pointer to file descriptor state */ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC if (!cnp->cn_cred || !cnp->cn_proc) panic ("namei: bad cred/proc"); if (cnp->cn_nameiop & (~OPMASK)) panic ("namei: nameiop contaminated with flags"); if (cnp->cn_flags & OPMASK) panic ("namei: flags contaminated with nameiops"); #endif fdp = cnp->cn_proc->p_fd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) cnp->cn_pnbuf = pool_get(&namei_pool, PR_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); else error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); /* * Fail on null pathnames */ if (error == 0 && ndp->ni_pathlen == 1) error = ENOENT; if (error) goto fail; #ifdef KTRACE if (KTRPOINT(cnp->cn_proc, KTR_NAMEI)) ktrnamei(cnp->cn_proc, cnp->cn_pnbuf); #endif /* * Strip trailing slashes, as requested */ if (cnp->cn_flags & STRIPSLASHES) { char *end = cnp->cn_pnbuf + ndp->ni_pathlen - 2; cp = end; while (cp >= cnp->cn_pnbuf && (*cp == '/')) cp--; /* Still some remaining characters in the buffer */ if (cp >= cnp->cn_pnbuf) { ndp->ni_pathlen -= (end - cp); *(cp + 1) = '\0'; } } ndp->ni_loopcnt = 0; /* * Get starting point for the translation. */ if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL || (ndp->ni_cnd.cn_flags & KERNELPATH)) ndp->ni_rootdir = rootvnode; if (ndp->ni_cnd.cn_flags & KERNELPATH) { ndp->ni_cnd.cn_flags |= BYPASSUNVEIL; } else { error = pledge_namei(p, ndp, cnp->cn_pnbuf); if (error) goto fail; } /* * Check if starting from root directory or current directory. */ if (cnp->cn_pnbuf[0] == '/') { dp = ndp->ni_rootdir; vref(dp); if (cnp->cn_flags & REALPATH && cnp->cn_rpi == 0) { cnp->cn_rpbuf[0] = '/'; cnp->cn_rpbuf[1] = '\0'; cnp->cn_rpi = 1; } } else if (ndp->ni_dirfd == AT_FDCWD) { dp = fdp->fd_cdir; vref(dp); unveil_start_relative(p, ndp, NULL); unveil_check_component(p, ndp, dp); } else { struct file *fp = fd_getfile(fdp, ndp->ni_dirfd); if (fp == NULL) { error = EBADF; goto fail; } dp = (struct vnode *)fp->f_data; if (fp->f_type != DTYPE_VNODE || dp->v_type != VDIR) { FRELE(fp, p); error = ENOTDIR; goto fail; } vref(dp); unveil_start_relative(p, ndp, dp); unveil_check_component(p, ndp, dp); FRELE(fp, p); } for (;;) { if (!dp->v_mount) { /* Give up if the directory is no longer mounted */ vrele(dp); error = ENOENT; goto fail; } cnp->cn_nameptr = cnp->cn_pnbuf; ndp->ni_startdir = dp; if ((error = vfs_lookup(ndp)) != 0) goto fail; /* * If not a symbolic link, return search result. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((error = unveil_check_final(p, ndp))) { if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && (ndp->ni_vp != ndp->ni_dvp)) vput(ndp->ni_dvp); if (ndp->ni_vp) { if ((cnp->cn_flags & LOCKLEAF)) vput(ndp->ni_vp); else vrele(ndp->ni_vp); } goto fail; } if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) pool_put(&namei_pool, cnp->cn_pnbuf); else cnp->cn_flags |= HASBUF; return (0); } if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(ndp->ni_dvp); if (ndp->ni_loopcnt++ >= SYMLOOP_MAX) { error = ELOOP; break; } if (ndp->ni_pathlen > 1) cp = pool_get(&namei_pool, PR_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = cnp->cn_proc; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink: if (ndp->ni_pathlen > 1) pool_put(&namei_pool, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink; } if (ndp->ni_pathlen > 1) { memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen); pool_put(&namei_pool, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; /* * Check if root directory should replace current directory. */ if (cnp->cn_pnbuf[0] == '/') { vrele(dp); dp = ndp->ni_rootdir; vref(dp); ndp->ni_unveil_match = NULL; unveil_check_component(p, ndp, dp); if (cnp->cn_flags & REALPATH) { cnp->cn_rpbuf[0] = '/'; cnp->cn_rpbuf[1] = '\0'; cnp->cn_rpi = 1; } } else if (cnp->cn_flags & REALPATH) { component_pop(cnp); } } vrele(ndp->ni_dvp); vput(ndp->ni_vp); fail: pool_put(&namei_pool, cnp->cn_pnbuf); ndp->ni_vp = NULL; return (error); } /* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_cnd.cn_nameptr and is of length * ni_pathlen. The starting directory is taken from ni_startdir. The * pathname is descended until done, or a symbolic link is encountered. * If the path is completed the flag ISLASTCN is set in ni_cnd.cn_flags. * If a symbolic link need interpretation is encountered, the flag ISSYMLINK * is set in ni_cnd.cn_flags. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp */ int vfs_lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ struct vnode *dp = 0; /* the directory we are searching */ struct vnode *tdp; /* saved dp */ struct mount *mp; /* mount table entry */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; int dpunlocked = 0; /* dp has already been unlocked */ int slashes; struct componentname *cnp = &ndp->ni_cnd; /* * Setup: break out flag bits into variables. */ wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; if (cnp->cn_nameiop == DELETE || (wantparent && cnp->cn_nameiop != CREATE)) docache = 0; rdonly = cnp->cn_flags & RDONLY; ndp->ni_dvp = NULL; cnp->cn_flags &= ~ISSYMLINK; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); /* * If we have a leading string of slashes, remove them, and just make * sure the current node is a directory. */ cp = cnp->cn_nameptr; if (*cp == '/') { do { cp++; } while (*cp == '/'); ndp->ni_pathlen -= cp - cnp->cn_nameptr; cnp->cn_nameptr = cp; if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } /* * If we've exhausted the path name, then just return the * current node. If the caller requested the parent node (i.e. * it's a CREATE, DELETE, or RENAME), and we don't have one * (because this is the root directory), then we must fail. */ if (cnp->cn_nameptr[0] == '\0') { if (ndp->ni_dvp == NULL && wantparent) { error = EISDIR; goto bad; } ndp->ni_vp = dp; cnp->cn_flags |= ISLASTCN; goto terminal; } } dirloop: /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ cnp->cn_consume = 0; /* XXX: Figure out the length of the last component. */ cp = cnp->cn_nameptr; while (*cp && (*cp != '/')) cp++; cnp->cn_namelen = cp - cnp->cn_nameptr; if (cnp->cn_namelen > NAME_MAX) { error = ENAMETOOLONG; goto bad; } #ifdef NAMEI_DIAGNOSTIC { char c = *cp; *cp = '\0'; printf("{%s}: ", cnp->cn_nameptr); *cp = c; } #endif if (cnp->cn_flags & REALPATH) { size_t len = cp - cnp->cn_nameptr; if (len == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') component_pop(cnp); else if (!(len == 1 && cnp->cn_nameptr[0] == '.')) { if (!component_push(cnp, cnp->cn_nameptr, len)) { error = ENAMETOOLONG; goto bad; } } } ndp->ni_pathlen -= cnp->cn_namelen; ndp->ni_next = cp; /* * If this component is followed by a slash, then move the pointer to * the next component forward, and remember that this component must be * a directory. */ if (*cp == '/') { do { cp++; } while (*cp == '/'); slashes = cp - ndp->ni_next; ndp->ni_pathlen -= slashes; ndp->ni_next = cp; cnp->cn_flags |= REQUIREDIR; } else { slashes = 0; cnp->cn_flags &= ~REQUIREDIR; } /* * We do special processing on the last component, whether or not it's * a directory. Cache all intervening lookups, but not the final one. */ if (*cp == '\0') { if (docache) cnp->cn_flags |= MAKEENTRY; else cnp->cn_flags &= ~MAKEENTRY; cnp->cn_flags |= ISLASTCN; } else { cnp->cn_flags |= MAKEENTRY; cnp->cn_flags &= ~ISLASTCN; } if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') cnp->cn_flags |= ISDOTDOT; else cnp->cn_flags &= ~ISDOTDOT; /* * Handle "..": two special cases. * 1. If at root directory (e.g. after chroot) * or at absolute root directory * or we are under unveil restrictions * then ignore it so can't get out. * 2. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other file system. */ if (cnp->cn_flags & ISDOTDOT) { for (;;) { if (dp == ndp->ni_rootdir || dp == rootvnode) { ndp->ni_dvp = dp; ndp->ni_vp = dp; vref(dp); ndp->ni_unveil_match = NULL; goto nextname; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; tdp = dp; dp = dp->v_mount->mnt_vnodecovered; vput(tdp); vref(dp); unveil_check_component(curproc, ndp, dp); vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); } } /* * We now have a segment name to search for, and a directory to search. */ ndp->ni_dvp = dp; ndp->ni_vp = NULL; cnp->cn_flags &= ~PDIRUNLOCK; unveil_check_component(curproc, ndp, dp); if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { #ifdef DIAGNOSTIC if (ndp->ni_vp != NULL) panic("leaf should be empty"); #endif #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif /* * Allow for unveiling a file in a directory which we cannot * create ourselves. */ if (ndp->ni_pledge == PLEDGE_UNVEIL && (error == EPERM || error == EACCES || error == EROFS)) error = EJUSTRETURN; if (error != EJUSTRETURN) goto bad; /* * If this was not the last component, or there were trailing * slashes, then the name must exist. */ if (cnp->cn_flags & REQUIREDIR) { error = ENOENT; goto bad; } /* * If creating and at end of pathname, then can consider * allowing file to be created. Check for a read only * filesystem and disallow this unless we are unveil'ing */ if (ndp->ni_pledge != PLEDGE_UNVEIL && (rdonly || (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY))) { error = EROFS; goto bad; } /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory inode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; vref(ndp->ni_startdir); } return (0); } #ifdef NAMEI_DIAGNOSTIC printf("found\n"); #endif /* * Take into account any additional components consumed by the * underlying filesystem. This will include any trailing slashes after * the last component consumed. */ if (cnp->cn_consume > 0) { if (cnp->cn_consume >= slashes) { cnp->cn_flags &= ~REQUIREDIR; } ndp->ni_pathlen -= cnp->cn_consume - slashes; ndp->ni_next += cnp->cn_consume - slashes; cnp->cn_consume = 0; if (ndp->ni_next[0] == '\0') cnp->cn_flags |= ISLASTCN; } dp = ndp->ni_vp; /* * Check to see if the vnode has been mounted on; * if so find the root of the mounted file system. */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { if (vfs_busy(mp, VB_READ|VB_WAIT)) continue; VOP_UNLOCK(dp); error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp); if (error) { dpunlocked = 1; goto bad2; } vrele(dp); ndp->ni_vp = dp = tdp; } /* * Check for symbolic link. Back up over any slashes that we skipped, * as we will need them again. */ if ((dp->v_type == VLNK) && (cnp->cn_flags & (FOLLOW|REQUIREDIR))) { ndp->ni_pathlen += slashes; ndp->ni_next -= slashes; cnp->cn_flags |= ISSYMLINK; return (0); } /* * Check for directory, if the component was followed by a series of * slashes. */ if ((dp->v_type != VDIR) && (cnp->cn_flags & REQUIREDIR)) { error = ENOTDIR; goto bad2; } nextname: /* * Not a symbolic link. If this was not the last component, then * continue at the next component, else return. */ if (!(cnp->cn_flags & ISLASTCN)) { cnp->cn_nameptr = ndp->ni_next; vrele(ndp->ni_dvp); goto dirloop; } terminal: /* * Check for read-only file systems. */ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) { /* * Disallow directory write attempts on read-only * file systems. */ if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) || (wantparent && (ndp->ni_dvp->v_mount->mnt_flag & MNT_RDONLY))) { error = EROFS; goto bad2; } } if (ndp->ni_dvp != NULL) { if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; vref(ndp->ni_startdir); } if (!wantparent) vrele(ndp->ni_dvp); } if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && ((cnp->cn_flags & PDIRUNLOCK) == 0)) VOP_UNLOCK(ndp->ni_dvp); vrele(ndp->ni_dvp); bad: if (dpunlocked) vrele(dp); else vput(dp); ndp->ni_vp = NULL; return (error); } /* * Reacquire a path name component. */ int vfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct vnode *dp = 0; /* the directory we are searching */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; #ifdef NAMEI_DIAGNOSTIC char *cp; /* DEBUG: check name ptr/len */ #endif /* * Setup: break out flag bits into variables. */ wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); /* dirloop: */ /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ #ifdef NAMEI_DIAGNOSTIC /* XXX: Figure out the length of the last component. */ cp = cnp->cn_nameptr; while (*cp && (*cp != '/')) { cp++; } if (cnp->cn_namelen != cp - cnp->cn_nameptr) panic("relookup: bad len"); if (*cp != 0) panic("relookup: not last component"); printf("{%s}: ", cnp->cn_nameptr); #endif /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') panic("relookup: null name"); if (cnp->cn_flags & ISDOTDOT) panic ("relookup: lookup on dot-dot"); /* * We now have a segment name to search for, and a directory to search. */ if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { #ifdef DIAGNOSTIC if (*vpp != NULL) panic("leaf should be empty"); #endif if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly || (dvp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto bad; } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) vref(dvp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory inode in ndp->ni_dvp. */ return (0); } dp = *vpp; #ifdef DIAGNOSTIC /* * Check for symbolic link */ if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW)) panic ("relookup: symlink found."); #endif /* * Check for read-only file systems. */ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) { /* * Disallow directory write attempts on read-only * file systems. */ if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) || (wantparent && (dvp->v_mount->mnt_flag & MNT_RDONLY))) { error = EROFS; goto bad2; } } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) vref(dvp); if (!wantparent) vrele(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(dvp); vrele(dvp); bad: vput(dp); *vpp = NULL; return (error); }
2614 1953 530 682 446 301 761 890 29 301 675 354 125 733 699 54 239 1806 510 296 226 430 513 427 351 358 172 428 1117 692 339 677 325 586 340 248 458 459 549 146 496 336 428 168 168 325 325 678 494 703 267 779 749 125 522 548 700 191 1415 1419 157 1286 1117 1524 1531 1527 1801 1804 1514 1524 1260 1763 301 1806 2156 2094 2128 1824 1337 1264 1242 224 605 2342 579 2098 317 707 494 402 237 6906 6899 1136 1136 1135 37 36 36 6379 5552 532 541 529 529 /* $OpenBSD: subr_tree.c,v 1.10 2018/10/09 08:28:43 dlg Exp $ */ /* * Copyright 2002 Niels Provos <provos@citi.umich.edu> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 2016 David Gwynne <dlg@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/tree.h> static inline struct rb_entry * rb_n2e(const struct rb_type *t, void *node) { unsigned long addr = (unsigned long)node; return ((struct rb_entry *)(addr + t->t_offset)); } static inline void * rb_e2n(const struct rb_type *t, struct rb_entry *rbe) { unsigned long addr = (unsigned long)rbe; return ((void *)(addr - t->t_offset)); } #define RBE_LEFT(_rbe) (_rbe)->rbt_left #define RBE_RIGHT(_rbe) (_rbe)->rbt_right #define RBE_PARENT(_rbe) (_rbe)->rbt_parent #define RBE_COLOR(_rbe) (_rbe)->rbt_color #define RBH_ROOT(_rbt) (_rbt)->rbt_root static inline void rbe_set(struct rb_entry *rbe, struct rb_entry *parent) { RBE_PARENT(rbe) = parent; RBE_LEFT(rbe) = RBE_RIGHT(rbe) = NULL; RBE_COLOR(rbe) = RB_RED; } static inline void rbe_set_blackred(struct rb_entry *black, struct rb_entry *red) { RBE_COLOR(black) = RB_BLACK; RBE_COLOR(red) = RB_RED; } static inline void rbe_augment(const struct rb_type *t, struct rb_entry *rbe) { (*t->t_augment)(rb_e2n(t, rbe)); } static inline void rbe_if_augment(const struct rb_type *t, struct rb_entry *rbe) { if (t->t_augment != NULL) rbe_augment(t, rbe); } static inline void rbe_rotate_left(const struct rb_type *t, struct rb_tree *rbt, struct rb_entry *rbe) { struct rb_entry *parent; struct rb_entry *tmp; tmp = RBE_RIGHT(rbe); RBE_RIGHT(rbe) = RBE_LEFT(tmp); if (RBE_RIGHT(rbe) != NULL) RBE_PARENT(RBE_LEFT(tmp)) = rbe; parent = RBE_PARENT(rbe); RBE_PARENT(tmp) = parent; if (parent != NULL) { if (rbe == RBE_LEFT(parent)) RBE_LEFT(parent) = tmp; else RBE_RIGHT(parent) = tmp; } else RBH_ROOT(rbt) = tmp; RBE_LEFT(tmp) = rbe; RBE_PARENT(rbe) = tmp; if (t->t_augment != NULL) { rbe_augment(t, rbe); rbe_augment(t, tmp); parent = RBE_PARENT(tmp); if (parent != NULL) rbe_augment(t, parent); } } static inline void rbe_rotate_right(const struct rb_type *t, struct rb_tree *rbt, struct rb_entry *rbe) { struct rb_entry *parent; struct rb_entry *tmp; tmp = RBE_LEFT(rbe); RBE_LEFT(rbe) = RBE_RIGHT(tmp); if (RBE_LEFT(rbe) != NULL) RBE_PARENT(RBE_RIGHT(tmp)) = rbe; parent = RBE_PARENT(rbe); RBE_PARENT(tmp) = parent; if (parent != NULL) { if (rbe == RBE_LEFT(parent)) RBE_LEFT(parent) = tmp; else RBE_RIGHT(parent) = tmp; } else RBH_ROOT(rbt) = tmp; RBE_RIGHT(tmp) = rbe; RBE_PARENT(rbe) = tmp; if (t->t_augment != NULL) { rbe_augment(t, rbe); rbe_augment(t, tmp); parent = RBE_PARENT(tmp); if (parent != NULL) rbe_augment(t, parent); } } static inline void rbe_insert_color(const struct rb_type *t, struct rb_tree *rbt, struct rb_entry *rbe) { struct rb_entry *parent, *gparent, *tmp; while ((parent = RBE_PARENT(rbe)) != NULL && RBE_COLOR(parent) == RB_RED) { gparent = RBE_PARENT(parent); if (parent == RBE_LEFT(gparent)) { tmp = RBE_RIGHT(gparent); if (tmp != NULL && RBE_COLOR(tmp) == RB_RED) { RBE_COLOR(tmp) = RB_BLACK; rbe_set_blackred(parent, gparent); rbe = gparent; continue; } if (RBE_RIGHT(parent) == rbe) { rbe_rotate_left(t, rbt, parent); tmp = parent; parent = rbe; rbe = tmp; } rbe_set_blackred(parent, gparent); rbe_rotate_right(t, rbt, gparent); } else { tmp = RBE_LEFT(gparent); if (tmp != NULL && RBE_COLOR(tmp) == RB_RED) { RBE_COLOR(tmp) = RB_BLACK; rbe_set_blackred(parent, gparent); rbe = gparent; continue; } if (RBE_LEFT(parent) == rbe) { rbe_rotate_right(t, rbt, parent); tmp = parent; parent = rbe; rbe = tmp; } rbe_set_blackred(parent, gparent); rbe_rotate_left(t, rbt, gparent); } } RBE_COLOR(RBH_ROOT(rbt)) = RB_BLACK; } static inline void rbe_remove_color(const struct rb_type *t, struct rb_tree *rbt, struct rb_entry *parent, struct rb_entry *rbe) { struct rb_entry *tmp; while ((rbe == NULL || RBE_COLOR(rbe) == RB_BLACK) && rbe != RBH_ROOT(rbt)) { if (RBE_LEFT(parent) == rbe) { tmp = RBE_RIGHT(parent); if (RBE_COLOR(tmp) == RB_RED) { rbe_set_blackred(tmp, parent); rbe_rotate_left(t, rbt, parent); tmp = RBE_RIGHT(parent); } if ((RBE_LEFT(tmp) == NULL || RBE_COLOR(RBE_LEFT(tmp)) == RB_BLACK) && (RBE_RIGHT(tmp) == NULL || RBE_COLOR(RBE_RIGHT(tmp)) == RB_BLACK)) { RBE_COLOR(tmp) = RB_RED; rbe = parent; parent = RBE_PARENT(rbe); } else { if (RBE_RIGHT(tmp) == NULL || RBE_COLOR(RBE_RIGHT(tmp)) == RB_BLACK) { struct rb_entry *oleft; oleft = RBE_LEFT(tmp); if (oleft != NULL) RBE_COLOR(oleft) = RB_BLACK; RBE_COLOR(tmp) = RB_RED; rbe_rotate_right(t, rbt, tmp); tmp = RBE_RIGHT(parent); } RBE_COLOR(tmp) = RBE_COLOR(parent); RBE_COLOR(parent) = RB_BLACK; if (RBE_RIGHT(tmp)) RBE_COLOR(RBE_RIGHT(tmp)) = RB_BLACK; rbe_rotate_left(t, rbt, parent); rbe = RBH_ROOT(rbt); break; } } else { tmp = RBE_LEFT(parent); if (RBE_COLOR(tmp) == RB_RED) { rbe_set_blackred(tmp, parent); rbe_rotate_right(t, rbt, parent); tmp = RBE_LEFT(parent); } if ((RBE_LEFT(tmp) == NULL || RBE_COLOR(RBE_LEFT(tmp)) == RB_BLACK) && (RBE_RIGHT(tmp) == NULL || RBE_COLOR(RBE_RIGHT(tmp)) == RB_BLACK)) { RBE_COLOR(tmp) = RB_RED; rbe = parent; parent = RBE_PARENT(rbe); } else { if (RBE_LEFT(tmp) == NULL || RBE_COLOR(RBE_LEFT(tmp)) == RB_BLACK) { struct rb_entry *oright; oright = RBE_RIGHT(tmp); if (oright != NULL) RBE_COLOR(oright) = RB_BLACK; RBE_COLOR(tmp) = RB_RED; rbe_rotate_left(t, rbt, tmp); tmp = RBE_LEFT(parent); } RBE_COLOR(tmp) = RBE_COLOR(parent); RBE_COLOR(parent) = RB_BLACK; if (RBE_LEFT(tmp) != NULL) RBE_COLOR(RBE_LEFT(tmp)) = RB_BLACK; rbe_rotate_right(t, rbt, parent); rbe = RBH_ROOT(rbt); break; } } } if (rbe != NULL) RBE_COLOR(rbe) = RB_BLACK; } static inline struct rb_entry * rbe_remove(const struct rb_type *t, struct rb_tree *rbt, struct rb_entry *rbe) { struct rb_entry *child, *parent, *old = rbe; unsigned int color; if (RBE_LEFT(rbe) == NULL) child = RBE_RIGHT(rbe); else if (RBE_RIGHT(rbe) == NULL) child = RBE_LEFT(rbe); else { struct rb_entry *tmp; rbe = RBE_RIGHT(rbe); while ((tmp = RBE_LEFT(rbe)) != NULL) rbe = tmp; child = RBE_RIGHT(rbe); parent = RBE_PARENT(rbe); color = RBE_COLOR(rbe); if (child != NULL) RBE_PARENT(child) = parent; if (parent != NULL) { if (RBE_LEFT(parent) == rbe) RBE_LEFT(parent) = child; else RBE_RIGHT(parent) = child; rbe_if_augment(t, parent); } else RBH_ROOT(rbt) = child; if (RBE_PARENT(rbe) == old) parent = rbe; *rbe = *old; tmp = RBE_PARENT(old); if (tmp != NULL) { if (RBE_LEFT(tmp) == old) RBE_LEFT(tmp) = rbe; else RBE_RIGHT(tmp) = rbe; rbe_if_augment(t, tmp); } else RBH_ROOT(rbt) = rbe; RBE_PARENT(RBE_LEFT(old)) = rbe; if (RBE_RIGHT(old)) RBE_PARENT(RBE_RIGHT(old)) = rbe; if (t->t_augment != NULL && parent != NULL) { tmp = parent; do { rbe_augment(t, tmp); tmp = RBE_PARENT(tmp); } while (tmp != NULL); } goto color; } parent = RBE_PARENT(rbe); color = RBE_COLOR(rbe); if (child != NULL) RBE_PARENT(child) = parent; if (parent != NULL) { if (RBE_LEFT(parent) == rbe) RBE_LEFT(parent) = child; else RBE_RIGHT(parent) = child; rbe_if_augment(t, parent); } else RBH_ROOT(rbt) = child; color: if (color == RB_BLACK) rbe_remove_color(t, rbt, parent, child); return (old); } void * _rb_remove(const struct rb_type *t, struct rb_tree *rbt, void *elm) { struct rb_entry *rbe = rb_n2e(t, elm); struct rb_entry *old; old = rbe_remove(t, rbt, rbe); return (old == NULL ? NULL : rb_e2n(t, old)); } void * _rb_insert(const struct rb_type *t, struct rb_tree *rbt, void *elm) { struct rb_entry *rbe = rb_n2e(t, elm); struct rb_entry *tmp; struct rb_entry *parent = NULL; void *node; int comp = 0; tmp = RBH_ROOT(rbt); while (tmp != NULL) { parent = tmp; node = rb_e2n(t, tmp); comp = (*t->t_compare)(elm, node); if (comp < 0) tmp = RBE_LEFT(tmp); else if (comp > 0) tmp = RBE_RIGHT(tmp); else return (node); } rbe_set(rbe, parent); if (parent != NULL) { if (comp < 0) RBE_LEFT(parent) = rbe; else RBE_RIGHT(parent) = rbe; rbe_if_augment(t, parent); } else RBH_ROOT(rbt) = rbe; rbe_insert_color(t, rbt, rbe); return (NULL); } /* Finds the node with the same key as elm */ void * _rb_find(const struct rb_type *t, struct rb_tree *rbt, const void *key) { struct rb_entry *tmp = RBH_ROOT(rbt); void *node; int comp; while (tmp != NULL) { node = rb_e2n(t, tmp); comp = (*t->t_compare)(key, node); if (comp < 0) tmp = RBE_LEFT(tmp); else if (comp > 0) tmp = RBE_RIGHT(tmp); else return (node); } return (NULL); } /* Finds the first node greater than or equal to the search key */ void * _rb_nfind(const struct rb_type *t, struct rb_tree *rbt, const void *key) { struct rb_entry *tmp = RBH_ROOT(rbt); void *node; void *res = NULL; int comp; while (tmp != NULL) { node = rb_e2n(t, tmp); comp = (*t->t_compare)(key, node); if (comp < 0) { res = node; tmp = RBE_LEFT(tmp); } else if (comp > 0) tmp = RBE_RIGHT(tmp); else return (node); } return (res); } void * _rb_next(const struct rb_type *t, void *elm) { struct rb_entry *rbe = rb_n2e(t, elm); if (RBE_RIGHT(rbe) != NULL) { rbe = RBE_RIGHT(rbe); while (RBE_LEFT(rbe) != NULL) rbe = RBE_LEFT(rbe); } else { if (RBE_PARENT(rbe) && (rbe == RBE_LEFT(RBE_PARENT(rbe)))) rbe = RBE_PARENT(rbe); else { while (RBE_PARENT(rbe) && (rbe == RBE_RIGHT(RBE_PARENT(rbe)))) rbe = RBE_PARENT(rbe); rbe = RBE_PARENT(rbe); } } return (rbe == NULL ? NULL : rb_e2n(t, rbe)); } void * _rb_prev(const struct rb_type *t, void *elm) { struct rb_entry *rbe = rb_n2e(t, elm); if (RBE_LEFT(rbe)) { rbe = RBE_LEFT(rbe); while (RBE_RIGHT(rbe)) rbe = RBE_RIGHT(rbe); } else { if (RBE_PARENT(rbe) && (rbe == RBE_RIGHT(RBE_PARENT(rbe)))) rbe = RBE_PARENT(rbe); else { while (RBE_PARENT(rbe) && (rbe == RBE_LEFT(RBE_PARENT(rbe)))) rbe = RBE_PARENT(rbe); rbe = RBE_PARENT(rbe); } } return (rbe == NULL ? NULL : rb_e2n(t, rbe)); } void * _rb_root(const struct rb_type *t, struct rb_tree *rbt) { struct rb_entry *rbe = RBH_ROOT(rbt); return (rbe == NULL ? rbe : rb_e2n(t, rbe)); } void * _rb_min(const struct rb_type *t, struct rb_tree *rbt) { struct rb_entry *rbe = RBH_ROOT(rbt); struct rb_entry *parent = NULL; while (rbe != NULL) { parent = rbe; rbe = RBE_LEFT(rbe); } return (parent == NULL ? NULL : rb_e2n(t, parent)); } void * _rb_max(const struct rb_type *t, struct rb_tree *rbt) { struct rb_entry *rbe = RBH_ROOT(rbt); struct rb_entry *parent = NULL; while (rbe != NULL) { parent = rbe; rbe = RBE_RIGHT(rbe); } return (parent == NULL ? NULL : rb_e2n(t, parent)); } void * _rb_left(const struct rb_type *t, void *node) { struct rb_entry *rbe = rb_n2e(t, node); rbe = RBE_LEFT(rbe); return (rbe == NULL ? NULL : rb_e2n(t, rbe)); } void * _rb_right(const struct rb_type *t, void *node) { struct rb_entry *rbe = rb_n2e(t, node); rbe = RBE_RIGHT(rbe); return (rbe == NULL ? NULL : rb_e2n(t, rbe)); } void * _rb_parent(const struct rb_type *t, void *node) { struct rb_entry *rbe = rb_n2e(t, node); rbe = RBE_PARENT(rbe); return (rbe == NULL ? NULL : rb_e2n(t, rbe)); } void _rb_set_left(const struct rb_type *t, void *node, void *left) { struct rb_entry *rbe = rb_n2e(t, node); struct rb_entry *rbl = (left == NULL) ? NULL : rb_n2e(t, left); RBE_LEFT(rbe) = rbl; } void _rb_set_right(const struct rb_type *t, void *node, void *right) { struct rb_entry *rbe = rb_n2e(t, node); struct rb_entry *rbr = (right == NULL) ? NULL : rb_n2e(t, right); RBE_RIGHT(rbe) = rbr; } void _rb_set_parent(const struct rb_type *t, void *node, void *parent) { struct rb_entry *rbe = rb_n2e(t, node); struct rb_entry *rbp = (parent == NULL) ? NULL : rb_n2e(t, parent); RBE_PARENT(rbe) = rbp; } void _rb_poison(const struct rb_type *t, void *node, unsigned long poison) { struct rb_entry *rbe = rb_n2e(t, node); RBE_PARENT(rbe) = RBE_LEFT(rbe) = RBE_RIGHT(rbe) = (struct rb_entry *)poison; } int _rb_check(const struct rb_type *t, void *node, unsigned long poison) { struct rb_entry *rbe = rb_n2e(t, node); return ((unsigned long)RBE_PARENT(rbe) == poison && (unsigned long)RBE_LEFT(rbe) == poison && (unsigned long)RBE_RIGHT(rbe) == poison); }
9798 9844 9987 9989 7771 7799 11124 11106 11169 6246 11111 11124 5368 6244 11117 656 657 1249 1254 1232 1233 4872 11166 11169 11035 11033 11030 1839 11016 1844 11018 232 10987 1854 11012 468 2 467 /* $OpenBSD: kern_lock.c,v 1.71 2020/03/05 09:28:31 claudio Exp $ */ /* * Copyright (c) 2017 Visa Hankala * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> * Copyright (c) 2004 Artur Grabowski <art@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/sched.h> #include <sys/atomic.h> #include <sys/witness.h> #include <sys/mutex.h> #include <ddb/db_output.h> #ifdef MP_LOCKDEBUG #ifndef DDB #error "MP_LOCKDEBUG requires DDB" #endif /* CPU-dependent timing, this needs to be settable from ddb. */ int __mp_lock_spinout = 200000000; #endif /* MP_LOCKDEBUG */ #ifdef MULTIPROCESSOR #include <sys/mplock.h> struct __mp_lock kernel_lock; /* * Functions for manipulating the kernel_lock. We put them here * so that they show up in profiles. */ void _kernel_lock_init(void) { __mp_lock_init(&kernel_lock); } /* * Acquire/release the kernel lock. Intended for use in the scheduler * and the lower half of the kernel. */ void _kernel_lock(void) { SCHED_ASSERT_UNLOCKED(); __mp_lock(&kernel_lock); } void _kernel_unlock(void) { __mp_unlock(&kernel_lock); } int _kernel_lock_held(void) { if (panicstr || db_active) return 1; return (__mp_lock_held(&kernel_lock, curcpu())); } #ifdef __USE_MI_MPLOCK /* Ticket lock implementation */ #include <machine/cpu.h> void ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type) { memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus)); mpl->mpl_users = 0; mpl->mpl_ticket = 1; #ifdef WITNESS mpl->mpl_lock_obj.lo_name = type->lt_name; mpl->mpl_lock_obj.lo_type = type; if (mpl == &kernel_lock) mpl->mpl_lock_obj.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | (LO_CLASS_KERNEL_LOCK << LO_CLASSSHIFT); else if (mpl == &sched_lock) mpl->mpl_lock_obj.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_RECURSABLE | (LO_CLASS_SCHED_LOCK << LO_CLASSSHIFT); WITNESS_INIT(&mpl->mpl_lock_obj, type); #endif } static __inline void __mp_lock_spin(struct __mp_lock *mpl, u_int me) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; #ifdef MP_LOCKDEBUG int nticks = __mp_lock_spinout; #endif spc->spc_spinning++; while (mpl->mpl_ticket != me) { CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG if (--nticks <= 0) { db_printf("%s: %p lock spun out\n", __func__, mpl); db_enter(); nticks = __mp_lock_spinout; } #endif } spc->spc_spinning--; } void __mp_lock(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; unsigned long s; #ifdef WITNESS if (!__mp_lock_held(mpl, curcpu())) WITNESS_CHECKORDER(&mpl->mpl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #endif s = intr_disable(); if (cpu->mplc_depth++ == 0) cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); intr_restore(s); __mp_lock_spin(mpl, cpu->mplc_ticket); membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); } void __mp_unlock(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; unsigned long s; #ifdef MP_LOCKDEBUG if (!__mp_lock_held(mpl, curcpu())) { db_printf("__mp_unlock(%p): not held lock\n", mpl); db_enter(); } #endif WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); s = intr_disable(); if (--cpu->mplc_depth == 0) { membar_exit(); mpl->mpl_ticket++; } intr_restore(s); } int __mp_release_all(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; unsigned long s; int rv; #ifdef WITNESS int i; #endif s = intr_disable(); rv = cpu->mplc_depth; #ifdef WITNESS for (i = 0; i < rv; i++) WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); #endif cpu->mplc_depth = 0; membar_exit(); mpl->mpl_ticket++; intr_restore(s); return (rv); } int __mp_release_all_but_one(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; int rv = cpu->mplc_depth - 1; #ifdef WITNESS int i; for (i = 0; i < rv; i++) WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); #endif #ifdef MP_LOCKDEBUG if (!__mp_lock_held(mpl, curcpu())) { db_printf("__mp_release_all_but_one(%p): not held lock\n", mpl); db_enter(); } #endif cpu->mplc_depth = 1; return (rv); } void __mp_acquire_count(struct __mp_lock *mpl, int count) { while (count--) __mp_lock(mpl); } int __mp_lock_held(struct __mp_lock *mpl, struct cpu_info *ci) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)]; return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0); } #endif /* __USE_MI_MPLOCK */ #endif /* MULTIPROCESSOR */ #ifdef __USE_MI_MUTEX void __mtx_init(struct mutex *mtx, int wantipl) { mtx->mtx_owner = NULL; mtx->mtx_wantipl = wantipl; mtx->mtx_oldipl = IPL_NONE; } #ifdef MULTIPROCESSOR void mtx_enter(struct mutex *mtx) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; #ifdef MP_LOCKDEBUG int nticks = __mp_lock_spinout; #endif WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE | LOP_NEWORDER, NULL); spc->spc_spinning++; while (mtx_enter_try(mtx) == 0) { CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG if (--nticks == 0) { db_printf("%s: %p lock spun out\n", __func__, mtx); db_enter(); nticks = __mp_lock_spinout; } #endif } spc->spc_spinning--; } int mtx_enter_try(struct mutex *mtx) { struct cpu_info *owner, *ci = curcpu(); int s; /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) return (1); if (mtx->mtx_wantipl != IPL_NONE) s = splraise(mtx->mtx_wantipl); owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); #ifdef DIAGNOSTIC if (__predict_false(owner == ci)) panic("mtx %p: locking against myself", mtx); #endif if (owner == NULL) { membar_enter_after_atomic(); if (mtx->mtx_wantipl != IPL_NONE) mtx->mtx_oldipl = s; #ifdef DIAGNOSTIC ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); return (1); } if (mtx->mtx_wantipl != IPL_NONE) splx(s); return (0); } #else void mtx_enter(struct mutex *mtx) { struct cpu_info *ci = curcpu(); /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) return; WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #ifdef DIAGNOSTIC if (__predict_false(mtx->mtx_owner == ci)) panic("mtx %p: locking against myself", mtx); #endif if (mtx->mtx_wantipl != IPL_NONE) mtx->mtx_oldipl = splraise(mtx->mtx_wantipl); mtx->mtx_owner = ci; #ifdef DIAGNOSTIC ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); } int mtx_enter_try(struct mutex *mtx) { mtx_enter(mtx); return (1); } #endif void mtx_leave(struct mutex *mtx) { int s; /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) return; MUTEX_ASSERT_LOCKED(mtx); WITNESS_UNLOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); #ifdef DIAGNOSTIC curcpu()->ci_mutex_level--; #endif s = mtx->mtx_oldipl; #ifdef MULTIPROCESSOR membar_exit(); #endif mtx->mtx_owner = NULL; if (mtx->mtx_wantipl != IPL_NONE) splx(s); } #ifdef DDB void db_mtx_enter(struct db_mutex *mtx) { struct cpu_info *ci = curcpu(), *owner; unsigned long s; #ifdef DIAGNOSTIC if (__predict_false(mtx->mtx_owner == ci)) panic("%s: mtx %p: locking against myself", __func__, mtx); #endif s = intr_disable(); for (;;) { owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); if (owner == NULL) break; CPU_BUSY_CYCLE(); } membar_enter_after_atomic(); mtx->mtx_intr_state = s; #ifdef DIAGNOSTIC ci->ci_mutex_level++; #endif } void db_mtx_leave(struct db_mutex *mtx) { #ifdef DIAGNOSTIC struct cpu_info *ci = curcpu(); #endif unsigned long s; #ifdef DIAGNOSTIC if (__predict_false(mtx->mtx_owner != ci)) panic("%s: mtx %p: not owned by this CPU", __func__, mtx); ci->ci_mutex_level--; #endif s = mtx->mtx_intr_state; #ifdef MULTIPROCESSOR membar_exit(); #endif mtx->mtx_owner = NULL; intr_restore(s); } #endif /* DDB */ #endif /* __USE_MI_MUTEX */ #ifdef WITNESS void _mtx_init_flags(struct mutex *m, int ipl, const char *name, int flags, const struct lock_type *type) { struct lock_object *lo = MUTEX_LOCK_OBJECT(m); lo->lo_flags = MTX_LO_FLAGS(flags); if (name != NULL) lo->lo_name = name; else lo->lo_name = type->lt_name; WITNESS_INIT(lo, type); _mtx_init(m, ipl); } #endif /* WITNESS */
1 1 /* $OpenBSD: gpio.c,v 1.15 2017/08/18 12:15:35 jsg Exp $ */ /* * Copyright (c) 2008 Marc Balmer <mbalmer@openbsd.org> * Copyright (c) 2004, 2006 Alexander Yurchenko <grange@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * General Purpose Input/Output framework. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/conf.h> #include <sys/device.h> #include <sys/fcntl.h> #include <sys/ioctl.h> #include <sys/gpio.h> #include <sys/vnode.h> #include <sys/malloc.h> #include <sys/queue.h> #include <dev/gpio/gpiovar.h> struct gpio_softc { struct device sc_dev; gpio_chipset_tag_t sc_gc; /* GPIO controller */ gpio_pin_t *sc_pins; /* pins array */ int sc_npins; /* number of pins */ int sc_opened; LIST_HEAD(, gpio_dev) sc_devs; /* devices */ LIST_HEAD(, gpio_name) sc_names; /* named pins */ }; int gpio_match(struct device *, void *, void *); int gpio_submatch(struct device *, void *, void *); void gpio_attach(struct device *, struct device *, void *); int gpio_detach(struct device *, int); int gpio_search(struct device *, void *, void *); int gpio_print(void *, const char *); int gpio_pinbyname(struct gpio_softc *, char *gp_name); struct cfattach gpio_ca = { sizeof (struct gpio_softc), gpio_match, gpio_attach, gpio_detach }; struct cfdriver gpio_cd = { NULL, "gpio", DV_DULL }; int gpio_match(struct device *parent, void *match, void *aux) { struct cfdata *cf = match; struct gpiobus_attach_args *gba = aux; return (strcmp(gba->gba_name, cf->cf_driver->cd_name) == 0); } int gpio_submatch(struct device *parent, void *match, void *aux) { struct cfdata *cf = match; struct gpio_attach_args *ga = aux; if (strcmp(ga->ga_dvname, cf->cf_driver->cd_name) != 0) return (0); return ((*cf->cf_attach->ca_match)(parent, match, aux)); } void gpio_attach(struct device *parent, struct device *self, void *aux) { struct gpio_softc *sc = (struct gpio_softc *)self; struct gpiobus_attach_args *gba = aux; sc->sc_gc = gba->gba_gc; sc->sc_pins = gba->gba_pins; sc->sc_npins = gba->gba_npins; printf(": %d pins\n", sc->sc_npins); /* * Attach all devices that can be connected to the GPIO pins * described in the kernel configuration file. */ config_search(gpio_search, self, sc); } int gpio_detach(struct device *self, int flags) { int maj, mn; /* Locate the major number */ for (maj = 0; maj < nchrdev; maj++) if (cdevsw[maj].d_open == gpioopen) break; /* Nuke the vnodes for any open instances (calls close) */ mn = self->dv_unit; vdevgone(maj, mn, mn, VCHR); return (0); } int gpio_search(struct device *parent, void *arg, void *aux) { struct cfdata *cf = arg; struct gpio_attach_args ga; ga.ga_gpio = aux; ga.ga_offset = cf->cf_loc[0]; ga.ga_mask = cf->cf_loc[1]; ga.ga_flags = cf->cf_loc[2]; if (cf->cf_attach->ca_match(parent, cf, &ga) > 0) config_attach(parent, cf, &ga, gpio_print); return (0); } int gpio_print(void *aux, const char *pnp) { struct gpio_attach_args *ga = aux; int i; printf(" pins"); for (i = 0; i < 32; i++) if (ga->ga_mask & (1 << i)) printf(" %d", ga->ga_offset + i); return (UNCONF); } int gpiobus_print(void *aux, const char *pnp) { struct gpiobus_attach_args *gba = aux; if (pnp != NULL) printf("%s at %s", gba->gba_name, pnp); return (UNCONF); } int gpio_pin_map(void *gpio, int offset, u_int32_t mask, struct gpio_pinmap *map) { struct gpio_softc *sc = gpio; int npins, pin, i; npins = gpio_npins(mask); if (npins > sc->sc_npins) return (1); for (npins = 0, i = 0; i < 32; i++) if (mask & (1 << i)) { pin = offset + i; if (pin < 0 || pin >= sc->sc_npins) return (1); if (sc->sc_pins[pin].pin_mapped) return (1); sc->sc_pins[pin].pin_mapped = 1; map->pm_map[npins++] = pin; } map->pm_size = npins; return (0); } void gpio_pin_unmap(void *gpio, struct gpio_pinmap *map) { struct gpio_softc *sc = gpio; int pin, i; for (i = 0; i < map->pm_size; i++) { pin = map->pm_map[i]; sc->sc_pins[pin].pin_mapped = 0; } } int gpio_pin_read(void *gpio, struct gpio_pinmap *map, int pin) { struct gpio_softc *sc = gpio; return (gpiobus_pin_read(sc->sc_gc, map->pm_map[pin])); } void gpio_pin_write(void *gpio, struct gpio_pinmap *map, int pin, int value) { struct gpio_softc *sc = gpio; return (gpiobus_pin_write(sc->sc_gc, map->pm_map[pin], value)); } void gpio_pin_ctl(void *gpio, struct gpio_pinmap *map, int pin, int flags) { struct gpio_softc *sc = gpio; return (gpiobus_pin_ctl(sc->sc_gc, map->pm_map[pin], flags)); } int gpio_pin_caps(void *gpio, struct gpio_pinmap *map, int pin) { struct gpio_softc *sc = gpio; return (sc->sc_pins[map->pm_map[pin]].pin_caps); } int gpio_npins(u_int32_t mask) { int npins, i; for (npins = 0, i = 0; i < 32; i++) if (mask & (1 << i)) npins++; return (npins); } int gpioopen(dev_t dev, int flag, int mode, struct proc *p) { struct gpio_softc *sc; sc = (struct gpio_softc *)device_lookup(&gpio_cd, minor(dev)); if (sc == NULL) return (ENXIO); if (sc->sc_opened) return (EBUSY); sc->sc_opened = 1; return (0); } int gpioclose(dev_t dev, int flag, int mode, struct proc *p) { struct gpio_softc *sc; sc = (struct gpio_softc *)device_lookup(&gpio_cd, minor(dev)); if (sc == NULL) return (ENXIO); sc->sc_opened = 0; return (0); } int gpio_pinbyname(struct gpio_softc *sc, char *gp_name) { struct gpio_name *nm; LIST_FOREACH(nm, &sc->sc_names, gp_next) if (!strcmp(nm->gp_name, gp_name)) return (nm->gp_pin); return (-1); } int gpioioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { struct gpio_softc *sc; gpio_chipset_tag_t gc; struct gpio_info *info; struct gpio_pin_op *op; struct gpio_attach *attach; struct gpio_attach_args ga; struct gpio_dev *gdev; struct gpio_name *nm; struct gpio_pin_set *set; struct device *dv; int pin, value, flags, npins, found; sc = (struct gpio_softc *)device_lookup(&gpio_cd, minor(dev)); if (sc == NULL) return (ENXIO); gc = sc->sc_gc; switch (cmd) { case GPIOINFO: info = (struct gpio_info *)data; if (securelevel < 1) info->gpio_npins = sc->sc_npins; else { for (pin = npins = 0; pin < sc->sc_npins; pin++) if (sc->sc_pins[pin].pin_flags & GPIO_PIN_SET) ++npins; info->gpio_npins = npins; } break; case GPIOPINREAD: op = (struct gpio_pin_op *)data; if (op->gp_name[0] != '\0') { pin = gpio_pinbyname(sc, op->gp_name); if (pin == -1) return (EINVAL); } else pin = op->gp_pin; if (pin < 0 || pin >= sc->sc_npins) return (EINVAL); if (!(sc->sc_pins[pin].pin_flags & GPIO_PIN_SET) && securelevel > 0) return (EPERM); /* return read value */ op->gp_value = gpiobus_pin_read(gc, pin); break; case GPIOPINWRITE: if ((flag & FWRITE) == 0) return (EBADF); op = (struct gpio_pin_op *)data; if (op->gp_name[0] != '\0') { pin = gpio_pinbyname(sc, op->gp_name); if (pin == -1) return (EINVAL); } else pin = op->gp_pin; if (pin < 0 || pin >= sc->sc_npins) return (EINVAL); if (sc->sc_pins[pin].pin_mapped) return (EBUSY); if (!(sc->sc_pins[pin].pin_flags & GPIO_PIN_SET) && securelevel > 0) return (EPERM); value = op->gp_value; if (value != GPIO_PIN_LOW && value != GPIO_PIN_HIGH) return (EINVAL); gpiobus_pin_write(gc, pin, value); /* return old value */ op->gp_value = sc->sc_pins[pin].pin_state; /* update current value */ sc->sc_pins[pin].pin_state = value; break; case GPIOPINTOGGLE: if ((flag & FWRITE) == 0) return (EBADF); op = (struct gpio_pin_op *)data; if (op->gp_name[0] != '\0') { pin = gpio_pinbyname(sc, op->gp_name); if (pin == -1) return (EINVAL); } else pin = op->gp_pin; if (pin < 0 || pin >= sc->sc_npins) return (EINVAL); if (sc->sc_pins[pin].pin_mapped) return (EBUSY); if (!(sc->sc_pins[pin].pin_flags & GPIO_PIN_SET) && securelevel > 0) return (EPERM); value = (sc->sc_pins[pin].pin_state == GPIO_PIN_LOW ? GPIO_PIN_HIGH : GPIO_PIN_LOW); gpiobus_pin_write(gc, pin, value); /* return old value */ op->gp_value = sc->sc_pins[pin].pin_state; /* update current value */ sc->sc_pins[pin].pin_state = value; break; case GPIOATTACH: if (securelevel > 0) return (EPERM); attach = (struct gpio_attach *)data; bzero(&ga, sizeof(ga)); ga.ga_gpio = sc; ga.ga_dvname = attach->ga_dvname; ga.ga_offset = attach->ga_offset; ga.ga_mask = attach->ga_mask; ga.ga_flags = attach->ga_flags; dv = config_found_sm((struct device *)sc, &ga, gpiobus_print, gpio_submatch); if (dv != NULL) { gdev = malloc(sizeof(*gdev), M_DEVBUF, M_WAITOK); gdev->sc_dev = dv; LIST_INSERT_HEAD(&sc->sc_devs, gdev, sc_next); } break; case GPIODETACH: if (securelevel > 0) return (EPERM); attach = (struct gpio_attach *)data; LIST_FOREACH(gdev, &sc->sc_devs, sc_next) { if (strcmp(gdev->sc_dev->dv_xname, attach->ga_dvname) == 0) { if (config_detach(gdev->sc_dev, 0) == 0) { LIST_REMOVE(gdev, sc_next); free(gdev, M_DEVBUF, sizeof(*gdev)); } break; } } break; case GPIOPINSET: if (securelevel > 0) return (EPERM); set = (struct gpio_pin_set *)data; if (set->gp_name[0] != '\0') { pin = gpio_pinbyname(sc, set->gp_name); if (pin == -1) return (EINVAL); } else pin = set->gp_pin; if (pin < 0 || pin >= sc->sc_npins) return (EINVAL); flags = set->gp_flags; /* check that the controller supports all requested flags */ if ((flags & sc->sc_pins[pin].pin_caps) != flags) return (ENODEV); flags = set->gp_flags | GPIO_PIN_SET; set->gp_caps = sc->sc_pins[pin].pin_caps; /* return old value */ set->gp_flags = sc->sc_pins[pin].pin_flags; if (flags > 0) { gpiobus_pin_ctl(gc, pin, flags); /* update current value */ sc->sc_pins[pin].pin_flags = flags; } /* rename pin or new pin? */ if (set->gp_name2[0] != '\0') { found = 0; LIST_FOREACH(nm, &sc->sc_names, gp_next) if (nm->gp_pin == pin) { strlcpy(nm->gp_name, set->gp_name2, sizeof(nm->gp_name)); found = 1; break; } if (!found) { nm = malloc(sizeof(*nm), M_DEVBUF, M_WAITOK); strlcpy(nm->gp_name, set->gp_name2, sizeof(nm->gp_name)); nm->gp_pin = set->gp_pin; LIST_INSERT_HEAD(&sc->sc_names, nm, gp_next); } } break; case GPIOPINUNSET: if (securelevel > 0) return (EPERM); set = (struct gpio_pin_set *)data; if (set->gp_name[0] != '\0') { pin = gpio_pinbyname(sc, set->gp_name); if (pin == -1) return (EINVAL); } else pin = set->gp_pin; if (pin < 0 || pin >= sc->sc_npins) return (EINVAL); if (sc->sc_pins[pin].pin_mapped) return (EBUSY); if (!(sc->sc_pins[pin].pin_flags & GPIO_PIN_SET)) return (EINVAL); LIST_FOREACH(nm, &sc->sc_names, gp_next) { if (nm->gp_pin == pin) { LIST_REMOVE(nm, gp_next); free(nm, M_DEVBUF, sizeof(*nm)); break; } } sc->sc_pins[pin].pin_flags &= ~GPIO_PIN_SET; break; default: return (ENOTTY); } return (0); }
200 1 199 1 200 10 196 197 9 201 1045 1046 1051 1044 1888 201 201 1655 /* $OpenBSD: vioscsi.c,v 1.26 2020/09/22 19:32:53 krw Exp $ */ /* * Copyright (c) 2013 Google Inc. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/device.h> #include <sys/mutex.h> #include <machine/bus.h> #include <machine/intr.h> #include <dev/pv/vioscsireg.h> #include <dev/pv/virtiovar.h> #include <scsi/scsi_all.h> #include <scsi/scsiconf.h> enum { vioscsi_debug = 0 }; #define DPRINTF(f...) do { if (vioscsi_debug) printf(f); } while (0) /* Number of DMA segments for buffers that the device must support */ #define SEG_MAX (MAXPHYS/PAGE_SIZE + 1) /* In the virtqueue, we need space for header and footer, too */ #define ALLOC_SEGS (SEG_MAX + 2) struct vioscsi_req { struct virtio_scsi_req_hdr vr_req; struct virtio_scsi_res_hdr vr_res; struct scsi_xfer *vr_xs; bus_dmamap_t vr_control; bus_dmamap_t vr_data; SLIST_ENTRY(vioscsi_req) vr_list; int vr_qe_index; }; struct vioscsi_softc { struct device sc_dev; struct scsi_iopool sc_iopool; struct mutex sc_vr_mtx; struct virtqueue sc_vqs[3]; struct vioscsi_req *sc_reqs; bus_dma_segment_t sc_reqs_segs[1]; SLIST_HEAD(, vioscsi_req) sc_freelist; }; int vioscsi_match(struct device *, void *, void *); void vioscsi_attach(struct device *, struct device *, void *); int vioscsi_alloc_reqs(struct vioscsi_softc *, struct virtio_softc *, int); void vioscsi_scsi_cmd(struct scsi_xfer *); int vioscsi_vq_done(struct virtqueue *); void vioscsi_req_done(struct vioscsi_softc *, struct virtio_softc *, struct vioscsi_req *); void *vioscsi_req_get(void *); void vioscsi_req_put(void *, void *); struct cfattach vioscsi_ca = { sizeof(struct vioscsi_softc), vioscsi_match, vioscsi_attach, }; struct cfdriver vioscsi_cd = { NULL, "vioscsi", DV_DULL, }; struct scsi_adapter vioscsi_switch = { vioscsi_scsi_cmd, NULL, NULL, NULL, NULL }; const char *const vioscsi_vq_names[] = { "control", "event", "request", }; int vioscsi_match(struct device *parent, void *self, void *aux) { struct virtio_softc *va = (struct virtio_softc *)aux; if (va->sc_childdevid == PCI_PRODUCT_VIRTIO_SCSI) return (1); return (0); } void vioscsi_attach(struct device *parent, struct device *self, void *aux) { struct virtio_softc *vsc = (struct virtio_softc *)parent; struct vioscsi_softc *sc = (struct vioscsi_softc *)self; struct scsibus_attach_args saa; int i, rv; if (vsc->sc_child != NULL) { printf(": parent already has a child\n"); return; } vsc->sc_child = &sc->sc_dev; vsc->sc_ipl = IPL_BIO; // TODO(matthew): Negotiate hotplug. vsc->sc_vqs = sc->sc_vqs; vsc->sc_nvqs = nitems(sc->sc_vqs); virtio_negotiate_features(vsc, NULL); uint32_t cmd_per_lun = virtio_read_device_config_4(vsc, VIRTIO_SCSI_CONFIG_CMD_PER_LUN); uint32_t seg_max = virtio_read_device_config_4(vsc, VIRTIO_SCSI_CONFIG_SEG_MAX); uint16_t max_target = virtio_read_device_config_2(vsc, VIRTIO_SCSI_CONFIG_MAX_TARGET); if (seg_max < SEG_MAX) { printf("\nMax number of segments %d too small\n", seg_max); goto err; } for (i = 0; i < nitems(sc->sc_vqs); i++) { rv = virtio_alloc_vq(vsc, &sc->sc_vqs[i], i, MAXPHYS, ALLOC_SEGS, vioscsi_vq_names[i]); if (rv) { printf(": failed to allocate virtqueue %d\n", i); goto err; } sc->sc_vqs[i].vq_done = vioscsi_vq_done; } int qsize = sc->sc_vqs[2].vq_num; printf(": qsize %d\n", qsize); SLIST_INIT(&sc->sc_freelist); mtx_init(&sc->sc_vr_mtx, IPL_BIO); scsi_iopool_init(&sc->sc_iopool, sc, vioscsi_req_get, vioscsi_req_put); int nreqs = vioscsi_alloc_reqs(sc, vsc, qsize); if (nreqs == 0) { printf("\nCan't alloc reqs\n"); goto err; } saa.saa_adapter = &vioscsi_switch; saa.saa_adapter_softc = sc; saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; saa.saa_adapter_buswidth = max_target; saa.saa_luns = 8; saa.saa_openings = (nreqs > cmd_per_lun) ? cmd_per_lun : nreqs; saa.saa_pool = &sc->sc_iopool; saa.saa_quirks = saa.saa_flags = 0; saa.saa_wwpn = saa.saa_wwnn = 0; config_found(self, &saa, scsiprint); return; err: vsc->sc_child = VIRTIO_CHILD_ERROR; return; } void vioscsi_scsi_cmd(struct scsi_xfer *xs) { struct vioscsi_softc *sc = xs->sc_link->bus->sb_adapter_softc; struct virtio_softc *vsc = (struct virtio_softc *)sc->sc_dev.dv_parent; struct vioscsi_req *vr = xs->io; struct virtio_scsi_req_hdr *req = &vr->vr_req; struct virtqueue *vq = &sc->sc_vqs[2]; int slot = vr->vr_qe_index; DPRINTF("vioscsi_scsi_cmd: enter\n"); // TODO(matthew): Support bidirectional SCSI commands? if ((xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) == (SCSI_DATA_IN | SCSI_DATA_OUT)) { goto stuffup; } vr->vr_xs = xs; /* * "The only supported format for the LUN field is: first byte set to * 1, second byte set to target, third and fourth byte representing a * single level LUN structure, followed by four zero bytes." */ if (xs->sc_link->target >= 256 || xs->sc_link->lun >= 16384) goto stuffup; req->lun[0] = 1; req->lun[1] = xs->sc_link->target; req->lun[2] = 0x40 | (xs->sc_link->lun >> 8); req->lun[3] = xs->sc_link->lun; memset(req->lun + 4, 0, 4); if ((size_t)xs->cmdlen > sizeof(req->cdb)) goto stuffup; memset(req->cdb, 0, sizeof(req->cdb)); memcpy(req->cdb, &xs->cmd, xs->cmdlen); int isread = !!(xs->flags & SCSI_DATA_IN); int nsegs = 2; if (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) { if (bus_dmamap_load(vsc->sc_dmat, vr->vr_data, xs->data, xs->datalen, NULL, ((isread ? BUS_DMA_READ : BUS_DMA_WRITE) | BUS_DMA_NOWAIT))) goto stuffup; nsegs += vr->vr_data->dm_nsegs; } /* * Adjust reservation to the number needed, or virtio gets upset. Note * that it may trim UP if 'xs' is being recycled w/o getting a new * reservation! */ int s = splbio(); virtio_enqueue_trim(vq, slot, nsegs); splx(s); bus_dmamap_sync(vsc->sc_dmat, vr->vr_control, offsetof(struct vioscsi_req, vr_req), sizeof(struct virtio_scsi_req_hdr), BUS_DMASYNC_PREWRITE); bus_dmamap_sync(vsc->sc_dmat, vr->vr_control, offsetof(struct vioscsi_req, vr_res), sizeof(struct virtio_scsi_res_hdr), BUS_DMASYNC_PREREAD); if (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) bus_dmamap_sync(vsc->sc_dmat, vr->vr_data, 0, xs->datalen, isread ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); s = splbio(); virtio_enqueue_p(vq, slot, vr->vr_control, offsetof(struct vioscsi_req, vr_req), sizeof(struct virtio_scsi_req_hdr), 1); if (xs->flags & SCSI_DATA_OUT) virtio_enqueue(vq, slot, vr->vr_data, 1); virtio_enqueue_p(vq, slot, vr->vr_control, offsetof(struct vioscsi_req, vr_res), sizeof(struct virtio_scsi_res_hdr), 0); if (xs->flags & SCSI_DATA_IN) virtio_enqueue(vq, slot, vr->vr_data, 0); virtio_enqueue_commit(vsc, vq, slot, 1); if (ISSET(xs->flags, SCSI_POLL)) { DPRINTF("vioscsi_scsi_cmd: polling...\n"); int timeout = 1000; do { virtio_poll_intr(vsc); if (vr->vr_xs != xs) break; delay(1000); } while (--timeout > 0); if (vr->vr_xs == xs) { // TODO(matthew): Abort the request. xs->error = XS_TIMEOUT; xs->resid = xs->datalen; DPRINTF("vioscsi_scsi_cmd: polling timeout\n"); scsi_done(xs); } DPRINTF("vioscsi_scsi_cmd: done (timeout=%d)\n", timeout); } splx(s); return; stuffup: xs->error = XS_DRIVER_STUFFUP; xs->resid = xs->datalen; DPRINTF("vioscsi_scsi_cmd: stuffup\n"); scsi_done(xs); } void vioscsi_req_done(struct vioscsi_softc *sc, struct virtio_softc *vsc, struct vioscsi_req *vr) { struct scsi_xfer *xs = vr->vr_xs; DPRINTF("vioscsi_req_done: enter vr: %p xs: %p\n", vr, xs); int isread = !!(xs->flags & SCSI_DATA_IN); bus_dmamap_sync(vsc->sc_dmat, vr->vr_control, offsetof(struct vioscsi_req, vr_req), sizeof(struct virtio_scsi_req_hdr), BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(vsc->sc_dmat, vr->vr_control, offsetof(struct vioscsi_req, vr_res), sizeof(struct virtio_scsi_res_hdr), BUS_DMASYNC_POSTREAD); if (xs->flags & (SCSI_DATA_IN | SCSI_DATA_OUT)) { bus_dmamap_sync(vsc->sc_dmat, vr->vr_data, 0, xs->datalen, isread ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(vsc->sc_dmat, vr->vr_data); } if (vr->vr_res.response != VIRTIO_SCSI_S_OK) { xs->error = XS_DRIVER_STUFFUP; xs->resid = xs->datalen; DPRINTF("vioscsi_req_done: stuffup: %d\n", vr->vr_res.response); goto done; } size_t sense_len = MIN(sizeof(xs->sense), vr->vr_res.sense_len); memcpy(&xs->sense, vr->vr_res.sense, sense_len); xs->error = (sense_len == 0) ? XS_NOERROR : XS_SENSE; xs->status = vr->vr_res.status; xs->resid = vr->vr_res.residual; DPRINTF("vioscsi_req_done: done %d, %d, %zd\n", xs->error, xs->status, xs->resid); done: vr->vr_xs = NULL; scsi_done(xs); } int vioscsi_vq_done(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; struct vioscsi_softc *sc = (struct vioscsi_softc *)vsc->sc_child; struct vq_entry *qe; struct vioscsi_req *vr; int ret = 0; DPRINTF("vioscsi_vq_done: enter\n"); for (;;) { int r, s, slot; s = splbio(); r = virtio_dequeue(vsc, vq, &slot, NULL); splx(s); if (r != 0) break; DPRINTF("vioscsi_vq_done: slot=%d\n", slot); qe = &vq->vq_entries[slot]; vr = &sc->sc_reqs[qe->qe_vr_index]; vioscsi_req_done(sc, vsc, vr); ret = 1; } DPRINTF("vioscsi_vq_done: exit %d\n", ret); return (ret); } /* * vioscso_req_get() provides the SCSI layer with all the * resources necessary to start an I/O on the device. * * Since the size of the I/O is unknown at this time the * resouces allocated (a.k.a. reserved) must be sufficient * to allow the maximum possible I/O size. * * When the I/O is actually attempted via vioscsi_scsi_cmd() * excess resources will be returned via virtio_enqueue_trim(). */ void * vioscsi_req_get(void *cookie) { struct vioscsi_softc *sc = cookie; struct vioscsi_req *vr = NULL; mtx_enter(&sc->sc_vr_mtx); vr = SLIST_FIRST(&sc->sc_freelist); if (vr != NULL) SLIST_REMOVE_HEAD(&sc->sc_freelist, vr_list); mtx_leave(&sc->sc_vr_mtx); DPRINTF("vioscsi_req_get: %p\n", vr); return (vr); } void vioscsi_req_put(void *cookie, void *io) { struct vioscsi_softc *sc = cookie; struct vioscsi_req *vr = io; DPRINTF("vioscsi_req_put: %p\n", vr); mtx_enter(&sc->sc_vr_mtx); /* * Do *NOT* call virtio_dequeue_commit()! * * Descriptors are permanently associated with the vioscsi_req and * should not be placed on the free list! */ SLIST_INSERT_HEAD(&sc->sc_freelist, vr, vr_list); mtx_leave(&sc->sc_vr_mtx); } int vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc, int qsize) { struct virtqueue *vq = &sc->sc_vqs[2]; struct vioscsi_req *vr; struct vring_desc *vd; size_t allocsize; int i, r, nreqs, rsegs, slot; void *vaddr; if (vq->vq_indirect != NULL) nreqs = qsize; else nreqs = qsize / ALLOC_SEGS; allocsize = nreqs * sizeof(struct vioscsi_req); r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0, &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); if (r != 0) { printf("bus_dmamem_alloc, size %zd, error %d\n", allocsize, r); return 0; } r = bus_dmamem_map(vsc->sc_dmat, &sc->sc_reqs_segs[0], 1, allocsize, (caddr_t *)&vaddr, BUS_DMA_NOWAIT); if (r != 0) { printf("bus_dmamem_map failed, error %d\n", r); bus_dmamem_free(vsc->sc_dmat, &sc->sc_reqs_segs[0], 1); return 0; } sc->sc_reqs = vaddr; memset(vaddr, 0, allocsize); for (i = 0; i < nreqs; i++) { /* * Assign descriptors and create the DMA maps for each * allocated request. */ vr = &sc->sc_reqs[i]; r = virtio_enqueue_prep(vq, &slot); if (r == 0) r = virtio_enqueue_reserve(vq, slot, ALLOC_SEGS); if (r != 0) return i; if (vq->vq_indirect == NULL) { /* * The reserved slots must be a contiguous block * starting at vq_desc[slot]. */ vd = &vq->vq_desc[slot]; for (r = 0; r < ALLOC_SEGS - 1; r++) { DPRINTF("vd[%d].next = %d should be %d\n", r, vd[r].next, (slot + r + 1)); if (vd[r].next != (slot + r + 1)) return i; } if (r == (ALLOC_SEGS -1) && vd[r].next != 0) return i; DPRINTF("Reserved slots are contiguous as required!\n"); } vr->vr_qe_index = slot; vr->vr_req.id = slot; vr->vr_req.task_attr = VIRTIO_SCSI_S_SIMPLE; vq->vq_entries[slot].qe_vr_index = i; r = bus_dmamap_create(vsc->sc_dmat, offsetof(struct vioscsi_req, vr_xs), 1, offsetof(struct vioscsi_req, vr_xs), 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_control); if (r != 0) { printf("bus_dmamap_create vr_control failed, error %d\n", r); return i; } r = bus_dmamap_create(vsc->sc_dmat, MAXPHYS, SEG_MAX, MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_data); if (r != 0) { printf("bus_dmamap_create vr_data failed, error %d\n", r ); return i; } r = bus_dmamap_load(vsc->sc_dmat, vr->vr_control, vr, offsetof(struct vioscsi_req, vr_xs), NULL, BUS_DMA_NOWAIT); if (r != 0) { printf("bus_dmamap_load vr_control failed, error %d\n", r ); return i; } SLIST_INSERT_HEAD(&sc->sc_freelist, vr, vr_list); } return nreqs; }
184 148 201 /* $OpenBSD: bus_space.c,v 1.26 2015/04/25 21:31:24 guenther Exp $ */ /* $NetBSD: bus_space.c,v 1.2 2003/03/14 18:47:53 christos Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace * Simulation Facility, NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/extent.h> #include <uvm/uvm_extern.h> #include <machine/bus.h> #include <dev/isa/isareg.h> #include <machine/isa_machdep.h> /* * Extent maps to manage I/O and memory space. Allocate * storage for 16 regions in each, initially. Later, ioport_malloc_safe * will indicate that it's safe to use malloc() to dynamically allocate * region descriptors. * * N.B. At least two regions are _always_ allocated from the iomem * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM). * * The extent maps are not static! Machine-dependent ISA and EISA * routines need access to them for bus address space allocation. */ static long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)]; static long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)]; struct extent *ioport_ex; struct extent *iomem_ex; static int ioport_malloc_safe; int x86_mem_add_mapping(bus_addr_t, bus_size_t, int, bus_space_handle_t *); u_int8_t x86_bus_space_io_read_1(bus_space_handle_t, bus_size_t); u_int16_t x86_bus_space_io_read_2(bus_space_handle_t, bus_size_t); u_int32_t x86_bus_space_io_read_4(bus_space_handle_t, bus_size_t); u_int64_t x86_bus_space_io_read_8(bus_space_handle_t, bus_size_t); void x86_bus_space_io_read_multi_1(bus_space_handle_t, bus_size_t, u_int8_t *, bus_size_t); void x86_bus_space_io_read_multi_2(bus_space_handle_t, bus_size_t, u_int16_t *, bus_size_t); void x86_bus_space_io_read_multi_4(bus_space_handle_t, bus_size_t, u_int32_t *, bus_size_t); void x86_bus_space_io_read_multi_8(bus_space_handle_t, bus_size_t, u_int64_t *, bus_size_t); void x86_bus_space_io_read_region_1(bus_space_handle_t, bus_size_t, u_int8_t *, bus_size_t); void x86_bus_space_io_read_region_2(bus_space_handle_t, bus_size_t, u_int16_t *, bus_size_t); void x86_bus_space_io_read_region_4(bus_space_handle_t, bus_size_t, u_int32_t *, bus_size_t); void x86_bus_space_io_read_region_8(bus_space_handle_t, bus_size_t, u_int64_t *, bus_size_t); void x86_bus_space_io_write_1(bus_space_handle_t, bus_size_t, u_int8_t); void x86_bus_space_io_write_2(bus_space_handle_t, bus_size_t, u_int16_t); void x86_bus_space_io_write_4(bus_space_handle_t, bus_size_t, u_int32_t); void x86_bus_space_io_write_8(bus_space_handle_t, bus_size_t, u_int64_t); void x86_bus_space_io_write_multi_1(bus_space_handle_t, bus_size_t, const u_int8_t *, bus_size_t); void x86_bus_space_io_write_multi_2(bus_space_handle_t, bus_size_t, const u_int16_t *, bus_size_t); void x86_bus_space_io_write_multi_4(bus_space_handle_t, bus_size_t, const u_int32_t *, bus_size_t); void x86_bus_space_io_write_multi_8(bus_space_handle_t, bus_size_t, const u_int64_t *, bus_size_t); void x86_bus_space_io_write_region_1(bus_space_handle_t, bus_size_t, const u_int8_t *, bus_size_t); void x86_bus_space_io_write_region_2(bus_space_handle_t, bus_size_t, const u_int16_t *, bus_size_t); void x86_bus_space_io_write_region_4(bus_space_handle_t, bus_size_t, const u_int32_t *, bus_size_t); void x86_bus_space_io_write_region_8(bus_space_handle_t, bus_size_t, const u_int64_t *, bus_size_t); void x86_bus_space_io_set_multi_1(bus_space_handle_t, bus_size_t, u_int8_t, size_t); void x86_bus_space_io_set_multi_2(bus_space_handle_t, bus_size_t, u_int16_t, size_t); void x86_bus_space_io_set_multi_4(bus_space_handle_t, bus_size_t, u_int32_t, size_t); void x86_bus_space_io_set_multi_8(bus_space_handle_t, bus_size_t, u_int64_t, size_t); void x86_bus_space_io_set_region_1(bus_space_handle_t, bus_size_t, u_int8_t, size_t); void x86_bus_space_io_set_region_2(bus_space_handle_t, bus_size_t, u_int16_t, size_t); void x86_bus_space_io_set_region_4(bus_space_handle_t, bus_size_t, u_int32_t, size_t); void x86_bus_space_io_set_region_8(bus_space_handle_t, bus_size_t, u_int64_t, size_t); void x86_bus_space_io_copy_1(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_io_copy_2(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_io_copy_4(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_io_copy_8(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void * x86_bus_space_io_vaddr(bus_space_handle_t); paddr_t x86_bus_space_io_mmap(bus_addr_t, off_t, int, int); const struct x86_bus_space_ops x86_bus_space_io_ops = { x86_bus_space_io_read_1, x86_bus_space_io_read_2, x86_bus_space_io_read_4, x86_bus_space_io_read_8, x86_bus_space_io_read_multi_1, x86_bus_space_io_read_multi_2, x86_bus_space_io_read_multi_4, x86_bus_space_io_read_multi_8, x86_bus_space_io_read_region_1, x86_bus_space_io_read_region_2, x86_bus_space_io_read_region_4, x86_bus_space_io_read_region_8, x86_bus_space_io_write_1, x86_bus_space_io_write_2, x86_bus_space_io_write_4, x86_bus_space_io_write_8, x86_bus_space_io_write_multi_1, x86_bus_space_io_write_multi_2, x86_bus_space_io_write_multi_4, x86_bus_space_io_write_multi_8, x86_bus_space_io_write_region_1, x86_bus_space_io_write_region_2, x86_bus_space_io_write_region_4, x86_bus_space_io_write_region_8, x86_bus_space_io_set_multi_1, x86_bus_space_io_set_multi_2, x86_bus_space_io_set_multi_4, x86_bus_space_io_set_multi_8, x86_bus_space_io_set_region_1, x86_bus_space_io_set_region_2, x86_bus_space_io_set_region_4, x86_bus_space_io_set_region_8, x86_bus_space_io_copy_1, x86_bus_space_io_copy_2, x86_bus_space_io_copy_4, x86_bus_space_io_copy_8, x86_bus_space_io_vaddr, x86_bus_space_io_mmap }; u_int8_t x86_bus_space_mem_read_1(bus_space_handle_t, bus_size_t); u_int16_t x86_bus_space_mem_read_2(bus_space_handle_t, bus_size_t); u_int32_t x86_bus_space_mem_read_4(bus_space_handle_t, bus_size_t); u_int64_t x86_bus_space_mem_read_8(bus_space_handle_t, bus_size_t); void x86_bus_space_mem_read_multi_1(bus_space_handle_t, bus_size_t, u_int8_t *, bus_size_t); void x86_bus_space_mem_read_multi_2(bus_space_handle_t, bus_size_t, u_int16_t *, bus_size_t); void x86_bus_space_mem_read_multi_4(bus_space_handle_t, bus_size_t, u_int32_t *, bus_size_t); void x86_bus_space_mem_read_multi_8(bus_space_handle_t, bus_size_t, u_int64_t *, bus_size_t); void x86_bus_space_mem_read_region_1(bus_space_handle_t, bus_size_t, u_int8_t *, bus_size_t); void x86_bus_space_mem_read_region_2(bus_space_handle_t, bus_size_t, u_int16_t *, bus_size_t); void x86_bus_space_mem_read_region_4(bus_space_handle_t, bus_size_t, u_int32_t *, bus_size_t); void x86_bus_space_mem_read_region_8(bus_space_handle_t, bus_size_t, u_int64_t *, bus_size_t); void x86_bus_space_mem_write_1(bus_space_handle_t, bus_size_t, u_int8_t); void x86_bus_space_mem_write_2(bus_space_handle_t, bus_size_t, u_int16_t); void x86_bus_space_mem_write_4(bus_space_handle_t, bus_size_t, u_int32_t); void x86_bus_space_mem_write_8(bus_space_handle_t, bus_size_t, u_int64_t); void x86_bus_space_mem_write_multi_1(bus_space_handle_t, bus_size_t, const u_int8_t *, bus_size_t); void x86_bus_space_mem_write_multi_2(bus_space_handle_t, bus_size_t, const u_int16_t *, bus_size_t); void x86_bus_space_mem_write_multi_4(bus_space_handle_t, bus_size_t, const u_int32_t *, bus_size_t); void x86_bus_space_mem_write_multi_8(bus_space_handle_t, bus_size_t, const u_int64_t *, bus_size_t); void x86_bus_space_mem_write_region_1(bus_space_handle_t, bus_size_t, const u_int8_t *, bus_size_t); void x86_bus_space_mem_write_region_2(bus_space_handle_t, bus_size_t, const u_int16_t *, bus_size_t); void x86_bus_space_mem_write_region_4(bus_space_handle_t, bus_size_t, const u_int32_t *, bus_size_t); void x86_bus_space_mem_write_region_8(bus_space_handle_t, bus_size_t, const u_int64_t *, bus_size_t); void x86_bus_space_mem_set_multi_1(bus_space_handle_t, bus_size_t, u_int8_t, size_t); void x86_bus_space_mem_set_multi_2(bus_space_handle_t, bus_size_t, u_int16_t, size_t); void x86_bus_space_mem_set_multi_4(bus_space_handle_t, bus_size_t, u_int32_t, size_t); void x86_bus_space_mem_set_multi_8(bus_space_handle_t, bus_size_t, u_int64_t, size_t); void x86_bus_space_mem_set_region_1(bus_space_handle_t, bus_size_t, u_int8_t, size_t); void x86_bus_space_mem_set_region_2(bus_space_handle_t, bus_size_t, u_int16_t, size_t); void x86_bus_space_mem_set_region_4(bus_space_handle_t, bus_size_t, u_int32_t, size_t); void x86_bus_space_mem_set_region_8(bus_space_handle_t, bus_size_t, u_int64_t, size_t); void x86_bus_space_mem_copy_1(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_mem_copy_2(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_mem_copy_4(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void x86_bus_space_mem_copy_8(bus_space_handle_t, bus_size_t, bus_space_handle_t, bus_size_t, size_t); void * x86_bus_space_mem_vaddr(bus_space_handle_t); paddr_t x86_bus_space_mem_mmap(bus_addr_t, off_t, int, int); const struct x86_bus_space_ops x86_bus_space_mem_ops = { x86_bus_space_mem_read_1, x86_bus_space_mem_read_2, x86_bus_space_mem_read_4, x86_bus_space_mem_read_8, x86_bus_space_mem_read_multi_1, x86_bus_space_mem_read_multi_2, x86_bus_space_mem_read_multi_4, x86_bus_space_mem_read_multi_8, x86_bus_space_mem_read_region_1, x86_bus_space_mem_read_region_2, x86_bus_space_mem_read_region_4, x86_bus_space_mem_read_region_8, x86_bus_space_mem_write_1, x86_bus_space_mem_write_2, x86_bus_space_mem_write_4, x86_bus_space_mem_write_8, x86_bus_space_mem_write_multi_1, x86_bus_space_mem_write_multi_2, x86_bus_space_mem_write_multi_4, x86_bus_space_mem_write_multi_8, x86_bus_space_mem_write_region_1, x86_bus_space_mem_write_region_2, x86_bus_space_mem_write_region_4, x86_bus_space_mem_write_region_8, x86_bus_space_mem_set_multi_1, x86_bus_space_mem_set_multi_2, x86_bus_space_mem_set_multi_4, x86_bus_space_mem_set_multi_8, x86_bus_space_mem_set_region_1, x86_bus_space_mem_set_region_2, x86_bus_space_mem_set_region_4, x86_bus_space_mem_set_region_8, x86_bus_space_mem_copy_1, x86_bus_space_mem_copy_2, x86_bus_space_mem_copy_4, x86_bus_space_mem_copy_8, x86_bus_space_mem_vaddr, x86_bus_space_mem_mmap }; void x86_bus_space_init(void) { /* * Initialize the I/O port and I/O mem extent maps. * Note: we don't have to check the return value since * creation of a fixed extent map will never fail (since * descriptor storage has already been allocated). * * N.B. The iomem extent manages _all_ physical addresses * on the machine. When the amount of RAM is found, the two * extents of RAM are allocated from the map (0 -> ISA hole * and end of ISA hole -> end of RAM). */ ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF, (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage), EX_NOCOALESCE|EX_NOWAIT); iomem_ex = extent_create("iomem", 0x0, 0xffffffffffff, M_DEVBUF, (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage), EX_NOCOALESCE|EX_NOWAIT); } void x86_bus_space_mallocok(void) { ioport_malloc_safe = 1; } int bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { int error; struct extent *ex; /* * Pick the appropriate extent map. */ if (t == X86_BUS_SPACE_IO) { ex = ioport_ex; if (flags & BUS_SPACE_MAP_LINEAR) return (EINVAL); } else if (t == X86_BUS_SPACE_MEM) ex = iomem_ex; else panic("bus_space_map: bad bus space tag"); /* * Before we go any further, let's make sure that this * region is available. */ error = extent_alloc_region(ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0)); if (error) return (error); /* * For I/O space, that's all she wrote. */ if (t == X86_BUS_SPACE_IO) { *bshp = bpa; return (0); } if (bpa >= IOM_BEGIN && (bpa + size) <= IOM_END) { *bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa); return(0); } /* * For memory space, map the bus physical address to * a kernel virtual address. */ error = x86_mem_add_mapping(bpa, size, flags, bshp); if (error) { if (extent_free(ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { printf("bus_space_map: pa 0x%lx, size 0x%lx\n", bpa, size); printf("bus_space_map: can't free region\n"); } } return (error); } int _bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { /* * For I/O space, just fill in the handle. */ if (t == X86_BUS_SPACE_IO) { *bshp = bpa; return (0); } /* * For memory space, map the bus physical address to * a kernel virtual address. */ return (x86_mem_add_mapping(bpa, size, flags, bshp)); } int bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend, bus_size_t size, bus_size_t alignment, bus_size_t boundary, int flags, bus_addr_t *bpap, bus_space_handle_t *bshp) { struct extent *ex; u_long bpa; int error; /* * Pick the appropriate extent map. */ if (t == X86_BUS_SPACE_IO) { ex = ioport_ex; } else if (t == X86_BUS_SPACE_MEM) ex = iomem_ex; else panic("bus_space_alloc: bad bus space tag"); /* * Sanity check the allocation against the extent's boundaries. */ if (rstart < ex->ex_start || rend > ex->ex_end) panic("bus_space_alloc: bad region start/end"); /* * Do the requested allocation. */ error = extent_alloc_subregion(ex, rstart, rend, size, alignment, 0, boundary, EX_FAST | EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0), &bpa); if (error) return (error); /* * For I/O space, that's all she wrote. */ if (t == X86_BUS_SPACE_IO) { *bshp = *bpap = bpa; return (0); } /* * For memory space, map the bus physical address to * a kernel virtual address. */ error = x86_mem_add_mapping(bpa, size, flags, bshp); if (error) { if (extent_free(iomem_ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { printf("bus_space_alloc: pa 0x%lx, size 0x%lx\n", bpa, size); printf("bus_space_alloc: can't free region\n"); } } *bpap = bpa; return (error); } int x86_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int flags, bus_space_handle_t *bshp) { paddr_t pa, endpa; vaddr_t va; bus_size_t map_size; int pmap_flags = PMAP_NOCACHE; pa = trunc_page(bpa); endpa = round_page(bpa + size); #ifdef DIAGNOSTIC if (endpa <= pa && endpa != 0) panic("bus_mem_add_mapping: overflow"); #endif map_size = endpa - pa; va = (vaddr_t)km_alloc(map_size, &kv_any, &kp_none, &kd_nowait); if (va == 0) return (ENOMEM); *bshp = (bus_space_handle_t)(va + (bpa & PGOFSET)); if (flags & BUS_SPACE_MAP_CACHEABLE) pmap_flags = 0; else if (flags & BUS_SPACE_MAP_PREFETCHABLE) pmap_flags = PMAP_WC; for (; map_size > 0; pa += PAGE_SIZE, va += PAGE_SIZE, map_size -= PAGE_SIZE) pmap_kenter_pa(va, pa | pmap_flags, PROT_READ | PROT_WRITE); pmap_update(pmap_kernel()); return 0; } /* * void _bus_space_unmap(bus_space_tag bst, bus_space_handle bsh, * bus_size_t size, bus_addr_t *adrp) * * This function unmaps memory- or io-space mapped by the function * _bus_space_map(). This function works nearly as same as * bus_space_unmap(), but this function does not ask kernel * built-in extents and returns physical address of the bus space, * for the convenience of the extra extent manager. */ void _bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size, bus_addr_t *adrp) { u_long va, endva; bus_addr_t bpa; /* * Find the correct bus physical address. */ if (t == X86_BUS_SPACE_IO) { bpa = bsh; } else if (t == X86_BUS_SPACE_MEM) { bpa = (bus_addr_t)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; va = trunc_page(bsh); endva = round_page(bsh + size); #ifdef DIAGNOSTIC if (endva <= va) panic("_bus_space_unmap: overflow"); #endif (void) pmap_extract(pmap_kernel(), va, &bpa); bpa += (bsh & PGOFSET); pmap_kremove(va, endva - va); pmap_update(pmap_kernel()); /* * Free the kernel virtual mapping. */ km_free((void *)va, endva - va, &kv_any, &kp_none); } else panic("bus_space_unmap: bad bus space tag"); ok: if (adrp != NULL) *adrp = bpa; } void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size) { struct extent *ex; u_long va, endva; bus_addr_t bpa; /* * Find the correct extent and bus physical address. */ if (t == X86_BUS_SPACE_IO) { ex = ioport_ex; bpa = bsh; } else if (t == X86_BUS_SPACE_MEM) { ex = iomem_ex; bpa = (bus_addr_t)ISA_PHYSADDR(bsh); if (IOM_BEGIN <= bpa && bpa <= IOM_END) goto ok; va = trunc_page(bsh); endva = round_page(bsh + size); #ifdef DIAGNOSTIC if (endva <= va) panic("bus_space_unmap: overflow"); #endif (void)pmap_extract(pmap_kernel(), va, &bpa); bpa += (bsh & PGOFSET); pmap_kremove(va, endva - va); pmap_update(pmap_kernel()); /* * Free the kernel virtual mapping. */ km_free((void *)va, endva - va, &kv_any, &kp_none); } else panic("bus_space_unmap: bad bus space tag"); ok: if (extent_free(ex, bpa, size, EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) { printf("bus_space_unmap: %s 0x%lx, size 0x%lx\n", (t == X86_BUS_SPACE_IO) ? "port" : "pa", bpa, size); printf("bus_space_unmap: can't free region\n"); } } void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size) { /* bus_space_unmap() does all that we need to do. */ bus_space_unmap(t, bsh, size); } int bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp) { *nbshp = bsh + offset; return (0); } u_int8_t x86_bus_space_io_read_1(bus_space_handle_t h, bus_size_t o) { return (inb(h + o)); } u_int16_t x86_bus_space_io_read_2(bus_space_handle_t h, bus_size_t o) { return (inw(h + o)); } u_int32_t x86_bus_space_io_read_4(bus_space_handle_t h, bus_size_t o) { return (inl(h + o)); } u_int64_t x86_bus_space_io_read_8(bus_space_handle_t h, bus_size_t o) { panic("bus_space_read_8: invalid bus space tag"); } void x86_bus_space_io_read_multi_1(bus_space_handle_t h, bus_size_t o, u_int8_t *ptr, bus_size_t cnt) { insb(h + o, ptr, cnt); } void x86_bus_space_io_read_multi_2(bus_space_handle_t h, bus_size_t o, u_int16_t *ptr, bus_size_t cnt) { insw(h + o, ptr, cnt); } void x86_bus_space_io_read_multi_4(bus_space_handle_t h, bus_size_t o, u_int32_t *ptr, bus_size_t cnt) { insl(h + o, ptr, cnt); } void x86_bus_space_io_read_multi_8(bus_space_handle_t h, bus_size_t o, u_int64_t *ptr, bus_size_t cnt) { panic("bus_space_multi_8: invalid bus space tag"); } void x86_bus_space_io_read_region_1(bus_space_handle_t h, bus_size_t o, u_int8_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: inb %w1,%%al ;" " stosb ;" " incl %1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=D" (dummy2), "=c" (dummy3) : "1" (port), "2" (ptr), "3" (cnt) : "memory"); } void x86_bus_space_io_read_region_2(bus_space_handle_t h, bus_size_t o, u_int16_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: inw %w1,%%ax ;" " stosw ;" " addl $2,%1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=D" (dummy2), "=c" (dummy3) : "1" ((port)), "2" ((ptr)), "3" ((cnt)) : "memory"); } void x86_bus_space_io_read_region_4(bus_space_handle_t h, bus_size_t o, u_int32_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: inl %w1,%%eax ;" " stosl ;" " addl $4,%1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=D" (dummy2), "=c" (dummy3) : "1" (port), "2" (ptr), "3" (cnt) : "memory"); } void x86_bus_space_io_read_region_8(bus_space_handle_t h, bus_size_t o, u_int64_t *ptr, bus_size_t cnt) { panic("bus_space_read_region_8: invalid bus space tag"); } void x86_bus_space_io_write_1(bus_space_handle_t h, bus_size_t o, u_int8_t v) { outb(h + o, v); } void x86_bus_space_io_write_2(bus_space_handle_t h, bus_size_t o, u_int16_t v) { outw(h + o, v); } void x86_bus_space_io_write_4(bus_space_handle_t h, bus_size_t o, u_int32_t v) { outl(h + o, v); } void x86_bus_space_io_write_8(bus_space_handle_t h, bus_size_t o, u_int64_t v) { panic("bus_space_write_8: invalid bus space tag"); } void x86_bus_space_io_write_multi_1(bus_space_handle_t h, bus_size_t o, const u_int8_t *ptr, bus_size_t cnt) { outsb(h + o, ptr, cnt); } void x86_bus_space_io_write_multi_2(bus_space_handle_t h, bus_size_t o, const u_int16_t *ptr, bus_size_t cnt) { outsw(h + o, ptr, cnt); } void x86_bus_space_io_write_multi_4(bus_space_handle_t h, bus_size_t o, const u_int32_t *ptr, bus_size_t cnt) { outsl(h + o, ptr, cnt); } void x86_bus_space_io_write_multi_8(bus_space_handle_t h, bus_size_t o, const u_int64_t *ptr, bus_size_t cnt) { panic("bus_space_write_multi_8: invalid bus space tag"); } void x86_bus_space_io_write_region_1(bus_space_handle_t h, bus_size_t o, const u_int8_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: lodsb ;" " outb %%al,%w1 ;" " incl %1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=S" (dummy2), "=c" (dummy3) : "1" (port), "2" (ptr), "3" (cnt) : "memory"); } void x86_bus_space_io_write_region_2(bus_space_handle_t h, bus_size_t o, const u_int16_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: lodsw ;" " outw %%ax,%w1 ;" " addl $2,%1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=S" (dummy2), "=c" (dummy3) : "1" (port), "2" (ptr), "3" (cnt) : "memory"); } void x86_bus_space_io_write_region_4(bus_space_handle_t h, bus_size_t o, const u_int32_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; int __x; u_int32_t port = h + o; __asm volatile( "1: lodsl ;" " outl %%eax,%w1 ;" " addl $4,%1 ;" " loop 1b" : "=&a" (__x), "=d" (dummy1), "=S" (dummy2), "=c" (dummy3) : "1" (port), "2" (ptr), "3" (cnt) : "memory"); } void x86_bus_space_io_write_region_8(bus_space_handle_t h, bus_size_t o, const u_int64_t *ptr, bus_size_t cnt) { panic("bus_space_write_region_8: invalid bus space tag"); } void x86_bus_space_io_set_multi_1(bus_space_handle_t h, bus_size_t o, u_int8_t v, size_t c) { bus_addr_t addr = h + o; while (c--) outb(addr, v); } void x86_bus_space_io_set_multi_2(bus_space_handle_t h, bus_size_t o, u_int16_t v, size_t c) { bus_addr_t addr = h + o; while (c--) outw(addr, v); } void x86_bus_space_io_set_multi_4(bus_space_handle_t h, bus_size_t o, u_int32_t v, size_t c) { bus_addr_t addr = h + o; while (c--) outl(addr, v); } void x86_bus_space_io_set_multi_8(bus_space_handle_t h, bus_size_t o, u_int64_t v, size_t c) { panic("bus_space_set_multi_8: invalid bus space tag"); } void x86_bus_space_io_set_region_1(bus_space_handle_t h, bus_size_t o, u_int8_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr++) outb(addr, v); } void x86_bus_space_io_set_region_2(bus_space_handle_t h, bus_size_t o, u_int16_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr += sizeof(v)) outw(addr, v); } void x86_bus_space_io_set_region_4(bus_space_handle_t h, bus_size_t o, u_int32_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr += sizeof(v)) outl(addr, v); } void x86_bus_space_io_set_region_8(bus_space_handle_t h, bus_size_t o, u_int64_t v, size_t c) { panic("bus_space_set_region_8: invalid bus space tag"); } void x86_bus_space_io_copy_1(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1++, addr2++) outb(addr2, inb(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += (c - 1), addr2 += (c - 1); c != 0; c--, addr1--, addr2--) outb(addr2, inb(addr1)); } } void x86_bus_space_io_copy_2(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 2, addr2 += 2) outw(addr2, inw(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1); c != 0; c--, addr1 -= 2, addr2 -= 2) outw(addr2, inw(addr1)); } } void x86_bus_space_io_copy_4(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 4, addr2 += 4) outl(addr2, inl(addr1)); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1); c != 0; c--, addr1 -= 4, addr2 -= 4) outl(addr2, inl(addr1)); } } void x86_bus_space_io_copy_8(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { panic("bus_space_set_region_8: invalid bus space tag"); } void * x86_bus_space_io_vaddr(bus_space_handle_t h) { return (NULL); } paddr_t x86_bus_space_io_mmap(bus_addr_t addr, off_t off, int prot, int flags) { /* Can't mmap I/O space. */ return (-1); } void x86_bus_space_mem_write_1(bus_space_handle_t h, bus_size_t o, u_int8_t v) { *(volatile u_int8_t *)(h + o) = v; } void x86_bus_space_mem_write_2(bus_space_handle_t h, bus_size_t o, u_int16_t v) { *(volatile u_int16_t *)(h + o) = v; } u_int8_t x86_bus_space_mem_read_1(bus_space_handle_t h, bus_size_t o) { return (*(volatile u_int8_t *)(h + o)); } u_int16_t x86_bus_space_mem_read_2(bus_space_handle_t h, bus_size_t o) { return (*(volatile u_int16_t *)(h + o)); } u_int32_t x86_bus_space_mem_read_4(bus_space_handle_t h, bus_size_t o) { return (*(volatile u_int32_t *)(h + o)); } u_int64_t x86_bus_space_mem_read_8(bus_space_handle_t h, bus_size_t o) { return (*(volatile u_int64_t *)(h + o)); } void x86_bus_space_mem_read_multi_1(bus_space_handle_t h, bus_size_t o, u_int8_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: movb (%2),%%al ;" " stosb ;" " loop 1b" : "=D" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" ((ptr)), "1" ((cnt)), "2" (h + o) : "memory"); } void x86_bus_space_mem_read_multi_2(bus_space_handle_t h, bus_size_t o, u_int16_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: movw (%2),%%ax ;" " stosw ;" " loop 1b" : "=D" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" ((ptr)), "1" ((cnt)), "2" (h + o) : "memory"); } void x86_bus_space_mem_read_multi_4(bus_space_handle_t h, bus_size_t o, u_int32_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: movl (%2),%%eax ;" " stosl ;" " loop 1b" : "=D" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" ((ptr)), "1" ((cnt)), "2" (h + o) : "memory"); } void x86_bus_space_mem_read_multi_8(bus_space_handle_t h, bus_size_t o, u_int64_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: movq (%2),%%rax ;" " stosq ;" " loop 1b" : "=D" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" ((ptr)), "1" ((cnt)), "2" (h + o) : "memory"); } void x86_bus_space_mem_read_region_1(bus_space_handle_t h, bus_size_t o, u_int8_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsb" : "=S" (dummy1), "=D" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_read_region_2(bus_space_handle_t h, bus_size_t o, u_int16_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsw" : "=S" (dummy1), "=D" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_read_region_4(bus_space_handle_t h, bus_size_t o, u_int32_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsl" : "=S" (dummy1), "=D" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_read_region_8(bus_space_handle_t h, bus_size_t o, u_int64_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsq" : "=S" (dummy1), "=D" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_write_4(bus_space_handle_t h, bus_size_t o, u_int32_t v) { *(volatile u_int32_t *)(h + o) = v; } void x86_bus_space_mem_write_8(bus_space_handle_t h, bus_size_t o, u_int64_t v) { *(volatile u_int64_t *)(h + o) = v; } void x86_bus_space_mem_write_multi_1(bus_space_handle_t h, bus_size_t o, const u_int8_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: lodsb ;" " movb %%al,(%2) ;" " loop 1b" : "=S" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" (ptr), "1" (cnt), "2" (h + o)); } void x86_bus_space_mem_write_multi_2(bus_space_handle_t h, bus_size_t o, const u_int16_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: lodsw ;" " movw %%ax,(%2) ;" " loop 1b" : "=S" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" (ptr), "1" (cnt), "2" (h + o)); } void x86_bus_space_mem_write_multi_4(bus_space_handle_t h, bus_size_t o, const u_int32_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: lodsl ;" " movl %%eax,(%2) ;" " loop 1b" : "=S" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" (ptr), "1" (cnt), "2" (h + o)); } void x86_bus_space_mem_write_multi_8(bus_space_handle_t h, bus_size_t o, const u_int64_t *ptr, bus_size_t cnt) { void *dummy1; int dummy2; void *dummy3; int __x; __asm volatile( "1: lodsq ;" " movq %%rax,(%2) ;" " loop 1b" : "=S" (dummy1), "=c" (dummy2), "=r" (dummy3), "=&a" (__x) : "0" (ptr), "1" (cnt), "2" (h + o)); } void x86_bus_space_mem_write_region_1(bus_space_handle_t h, bus_size_t o, const u_int8_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsb" : "=D" (dummy1), "=S" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_write_region_2(bus_space_handle_t h, bus_size_t o, const u_int16_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsw" : "=D" (dummy1), "=S" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_write_region_4(bus_space_handle_t h, bus_size_t o, const u_int32_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsl" : "=D" (dummy1), "=S" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_write_region_8(bus_space_handle_t h, bus_size_t o, const u_int64_t *ptr, bus_size_t cnt) { int dummy1; void *dummy2; int dummy3; __asm volatile( " repne ;" " movsq" : "=D" (dummy1), "=S" (dummy2), "=c" (dummy3) : "0" (h + o), "1" (ptr), "2" (cnt) : "memory"); } void x86_bus_space_mem_set_multi_1(bus_space_handle_t h, bus_size_t o, u_int8_t v, size_t c) { bus_addr_t addr = h + o; while (c--) *(volatile u_int8_t *)(addr) = v; } void x86_bus_space_mem_set_multi_2(bus_space_handle_t h, bus_size_t o, u_int16_t v, size_t c) { bus_addr_t addr = h + o; while (c--) *(volatile u_int16_t *)(addr) = v; } void x86_bus_space_mem_set_multi_4(bus_space_handle_t h, bus_size_t o, u_int32_t v, size_t c) { bus_addr_t addr = h + o; while (c--) *(volatile u_int32_t *)(addr) = v; } void x86_bus_space_mem_set_multi_8(bus_space_handle_t h, bus_size_t o, u_int64_t v, size_t c) { bus_addr_t addr = h + o; while (c--) *(volatile u_int64_t *)(addr) = v; } void x86_bus_space_mem_set_region_1(bus_space_handle_t h, bus_size_t o, u_int8_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr++) *(volatile u_int8_t *)(addr) = v; } void x86_bus_space_mem_set_region_2(bus_space_handle_t h, bus_size_t o, u_int16_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr += sizeof(v)) *(volatile u_int16_t *)(addr) = v; } void x86_bus_space_mem_set_region_4(bus_space_handle_t h, bus_size_t o, u_int32_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr += sizeof(v)) *(volatile u_int32_t *)(addr) = v; } void x86_bus_space_mem_set_region_8(bus_space_handle_t h, bus_size_t o, u_int64_t v, size_t c) { bus_addr_t addr = h + o; for (; c != 0; c--, addr += sizeof(v)) *(volatile u_int64_t *)(addr) = v; } void x86_bus_space_mem_copy_1( bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1++, addr2++) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += (c - 1), addr2 += (c - 1); c != 0; c--, addr1--, addr2--) *(volatile u_int8_t *)(addr2) = *(volatile u_int8_t *)(addr1); } } void x86_bus_space_mem_copy_2(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 2, addr2 += 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1); c != 0; c--, addr1 -= 2, addr2 -= 2) *(volatile u_int16_t *)(addr2) = *(volatile u_int16_t *)(addr1); } } void x86_bus_space_mem_copy_4(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 4, addr2 += 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1); c != 0; c--, addr1 -= 4, addr2 -= 4) *(volatile u_int32_t *)(addr2) = *(volatile u_int32_t *)(addr1); } } void x86_bus_space_mem_copy_8(bus_space_handle_t h1, bus_size_t o1, bus_space_handle_t h2, bus_size_t o2, size_t c) { bus_addr_t addr1 = h1 + o1; bus_addr_t addr2 = h2 + o2; if (addr1 >= addr2) { /* src after dest: copy forward */ for (; c != 0; c--, addr1 += 8, addr2 += 8) *(volatile u_int64_t *)(addr2) = *(volatile u_int64_t *)(addr1); } else { /* dest after src: copy backwards */ for (addr1 += 8 * (c - 1), addr2 += 8 * (c - 1); c != 0; c--, addr1 -= 8, addr2 -= 8) *(volatile u_int64_t *)(addr2) = *(volatile u_int64_t *)(addr1); } } void * x86_bus_space_mem_vaddr(bus_space_handle_t h) { return ((void *)h); } paddr_t x86_bus_space_mem_mmap(bus_addr_t addr, off_t off, int prot, int flags) { /* * "addr" is the base address of the device we're mapping. * "off" is the offset into that device. * * Note we are called for each "page" in the device that * the upper layers want to map. */ return (addr + off); }
2 /* $OpenBSD: ntfs_vfsops.c,v 1.64 2020/02/27 09:10:31 mpi Exp $ */ /* $NetBSD: ntfs_vfsops.c,v 1.7 2003/04/24 07:50:19 christos Exp $ */ /*- * Copyright (c) 1998, 1999 Semen Ustimenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Id: ntfs_vfsops.c,v 1.7 1999/05/31 11:28:30 phk Exp */ #include <sys/param.h> #include <sys/systm.h> #include <sys/namei.h> #include <sys/proc.h> #include <sys/kernel.h> #include <sys/vnode.h> #include <sys/lock.h> #include <sys/mount.h> #include <sys/buf.h> #include <sys/disk.h> #include <sys/fcntl.h> #include <sys/malloc.h> #include <sys/device.h> #include <sys/conf.h> #include <sys/specdev.h> /*#define NTFS_DEBUG 1*/ #include <ntfs/ntfs.h> #include <ntfs/ntfs_inode.h> #include <ntfs/ntfs_subr.h> #include <ntfs/ntfs_vfsops.h> #include <ntfs/ntfs_ihash.h> int ntfs_mount(struct mount *, const char *, void *, struct nameidata *, struct proc *); int ntfs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int ntfs_root(struct mount *, struct vnode **); int ntfs_start(struct mount *, int, struct proc *); int ntfs_statfs(struct mount *, struct statfs *, struct proc *); int ntfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int ntfs_unmount(struct mount *, int, struct proc *); int ntfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp); int ntfs_mountfs(struct vnode *, struct mount *, struct ntfs_args *, struct proc *); int ntfs_vptofh(struct vnode *, struct fid *); int ntfs_init(struct vfsconf *); int ntfs_fhtovp(struct mount *, struct fid *, struct vnode **); int ntfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **); int ntfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); /* * Verify a remote client has export rights and return these rights via. * exflagsp and credanonp. */ int ntfs_checkexp(struct mount *mp, struct mbuf *nam, int *exflagsp, struct ucred **credanonp) { struct netcred *np; struct ntfsmount *ntm = VFSTONTFS(mp); /* * Get the export permission structure for this <mp, client> tuple. */ np = vfs_export_lookup(mp, &ntm->ntm_export, nam); if (np == NULL) return (EACCES); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int ntfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p) { return (EINVAL); } int ntfs_init(struct vfsconf *vcp) { return 0; } int ntfs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { int err = 0; struct vnode *devvp; struct ntfs_args *args = data; char fname[MNAMELEN]; char fspec[MNAMELEN]; ntfs_nthashinit(); /* *** * Mounting non-root file system or updating a file system *** */ /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { /* if not updating name...*/ if (args && args->fspec == NULL) { /* * Process export requests. Jumping to "success" * will return the vfs_export() error code. */ struct ntfsmount *ntm = VFSTONTFS(mp); err = vfs_export(mp, &ntm->ntm_export, &args->export_info); goto success; } printf("ntfs_mount(): MNT_UPDATE not supported\n"); err = EINVAL; goto error_1; } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ err = copyinstr(args->fspec, fspec, sizeof(fspec), NULL); if (err) goto error_1; if (disk_map(fspec, fname, sizeof(fname), DM_OPENBLCK) == -1) bcopy(fspec, fname, sizeof(fname)); NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p); err = namei(ndp); if (err) { /* can't get devvp!*/ goto error_1; } devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { err = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { err = ENXIO; goto error_2; } if (mp->mnt_flag & MNT_UPDATE) { #if 0 /* ******************** * UPDATE ******************** */ if (devvp != ntmp->um_devvp) err = EINVAL; /* needs translation */ else vrele(devvp); /* * Update device name only on success */ if( !err) { err = set_statfs_info(NULL, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp, p); } #endif } else { /* ******************** * NEW MOUNT ******************** */ /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ /* Save "last mounted on" info for mount point (NULL pad)*/ bzero(mp->mnt_stat.f_mntonname, MNAMELEN); strlcpy(mp->mnt_stat.f_mntonname, path, MNAMELEN); bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN); bzero(mp->mnt_stat.f_mntfromspec, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); bcopy(args, &mp->mnt_stat.mount_info.ntfs_args, sizeof(*args)); if ( !err) { err = ntfs_mountfs(devvp, mp, args, p); } } if (err) { goto error_2; } /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ (void)VFS_STATFS(mp, &mp->mnt_stat, p); goto success; error_2: /* error with devvp held*/ /* release devvp before failing*/ vrele(devvp); error_1: /* no state to back out*/ success: return(err); } /* * Common code for mount and mountroot */ int ntfs_mountfs(struct vnode *devvp, struct mount *mp, struct ntfs_args *argsp, struct proc *p) { struct buf *bp; struct ntfsmount *ntmp = NULL; dev_t dev = devvp->v_rdev; int error, ncount, i; struct vnode *vp; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); ncount = vcount(devvp); if (ncount > 1 && devvp != rootvp) return (EBUSY); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, INFSLP); VOP_UNLOCK(devvp); if (error) return (error); error = VOP_OPEN(devvp, FREAD, FSCRED, p); if (error) return (error); bp = NULL; error = bread(devvp, BBLOCK, BBSIZE, &bp); if (error) goto out; ntmp = malloc(sizeof *ntmp, M_NTFSMNT, M_WAITOK | M_ZERO); bcopy(bp->b_data, &ntmp->ntm_bootfile, sizeof(struct bootfile)); brelse(bp); bp = NULL; if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) { error = EINVAL; DPRINTF("ntfs_mountfs: invalid boot block\n"); goto out; } { int8_t cpr = ntmp->ntm_mftrecsz; if( cpr > 0 ) ntmp->ntm_bpmftrec = ntmp->ntm_spc * cpr; else ntmp->ntm_bpmftrec = (1 << (-cpr)) / ntmp->ntm_bps; } DPRINTF("ntfs_mountfs(): bps: %u, spc: %u, media: %x, " "mftrecsz: %u (%u sects)\n", ntmp->ntm_bps, ntmp->ntm_spc, ntmp->ntm_bootfile.bf_media, ntmp->ntm_mftrecsz, ntmp->ntm_bpmftrec); DPRINTF("ntfs_mountfs(): mftcn: 0x%llx|0x%llx\n", ntmp->ntm_mftcn, ntmp->ntm_mftmirrcn); ntmp->ntm_mountp = mp; ntmp->ntm_dev = dev; ntmp->ntm_devvp = devvp; ntmp->ntm_uid = argsp->uid; ntmp->ntm_gid = argsp->gid; ntmp->ntm_mode = argsp->mode; ntmp->ntm_flag = argsp->flag; mp->mnt_data = ntmp; TAILQ_INIT(&ntmp->ntm_ntnodeq); /* set file name encode/decode hooks XXX utf-8 only for now */ ntmp->ntm_wget = ntfs_utf8_wget; ntmp->ntm_wput = ntfs_utf8_wput; ntmp->ntm_wcmp = ntfs_utf8_wcmp; DPRINTF("ntfs_mountfs(): case-%s,%s uid: %d, gid: %d, mode: %o\n", (ntmp->ntm_flag & NTFS_MFLAG_CASEINS) ? "insens." : "sens.", (ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES) ? " allnames," : "", ntmp->ntm_uid, ntmp->ntm_gid, ntmp->ntm_mode); /* * We read in some system nodes to do not allow * reclaim them and to have everytime access to them. */ { int pi[3] = { NTFS_MFTINO, NTFS_ROOTINO, NTFS_BITMAPINO }; for (i=0; i<3; i++) { error = VFS_VGET(mp, pi[i], &(ntmp->ntm_sysvn[pi[i]])); if(error) goto out1; ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM; vref(ntmp->ntm_sysvn[pi[i]]); vput(ntmp->ntm_sysvn[pi[i]]); } } /* read the Unicode lowercase --> uppercase translation table, * if necessary */ if ((error = ntfs_toupper_use(mp, ntmp, p))) goto out1; /* * Scan $BitMap and count free clusters */ error = ntfs_calccfree(ntmp, &ntmp->ntm_cfree); if(error) goto out1; /* * Read and translate to internal format attribute * definition file. */ { int num,j; struct attrdef ad; /* Open $AttrDef */ error = VFS_VGET(mp, NTFS_ATTRDEFINO, &vp ); if(error) goto out1; /* Count valid entries */ for(num = 0; ; num++) { error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, num * sizeof(ad), sizeof(ad), &ad, NULL); if (error) goto out1; if (ad.ad_name[0] == 0) break; } /* Alloc memory for attribute definitions */ ntmp->ntm_ad = mallocarray(num, sizeof(struct ntvattrdef), M_NTFSMNT, M_WAITOK); ntmp->ntm_adnum = num; /* Read them and translate */ for(i = 0; i < num; i++){ error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, i * sizeof(ad), sizeof(ad), &ad, NULL); if (error) goto out1; j = 0; do { ntmp->ntm_ad[i].ad_name[j] = ad.ad_name[j]; } while(ad.ad_name[j++]); ntmp->ntm_ad[i].ad_namelen = j - 1; ntmp->ntm_ad[i].ad_type = ad.ad_type; } vput(vp); } mp->mnt_stat.f_fsid.val[0] = dev; mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_namemax = NTFS_MAXFILENAME; mp->mnt_flag |= MNT_LOCAL; devvp->v_specmountpoint = mp; return (0); out1: for (i = 0; i < NTFS_SYSNODESNUM; i++) if (ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]); if (vflush(mp,NULLVP,0)) DPRINTF("ntfs_mountfs: vflush failed\n"); out: if (devvp->v_specinfo) devvp->v_specmountpoint = NULL; if (bp) brelse(bp); if (ntmp != NULL) { if (ntmp->ntm_ad != NULL) free(ntmp->ntm_ad, M_NTFSMNT, 0); free(ntmp, M_NTFSMNT, 0); mp->mnt_data = NULL; } /* lock the device vnode before calling VOP_CLOSE() */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); VOP_UNLOCK(devvp); return (error); } int ntfs_start(struct mount *mp, int flags, struct proc *p) { return (0); } int ntfs_unmount(struct mount *mp, int mntflags, struct proc *p) { struct ntfsmount *ntmp; int error, flags, i; DPRINTF("ntfs_unmount: unmounting...\n"); ntmp = VFSTONTFS(mp); flags = 0; if(mntflags & MNT_FORCE) flags |= FORCECLOSE; DPRINTF("ntfs_unmount: vflushing...\n"); error = vflush(mp,NULLVP,flags | SKIPSYSTEM); if (error) { DPRINTF("ntfs_unmount: vflush failed: %d\n", error); return (error); } /* Check if system vnodes are still referenced */ for(i=0;i<NTFS_SYSNODESNUM;i++) { if(((mntflags & MNT_FORCE) == 0) && (ntmp->ntm_sysvn[i] && ntmp->ntm_sysvn[i]->v_usecount > 1)) return (EBUSY); } /* Dereference all system vnodes */ for(i=0;i<NTFS_SYSNODESNUM;i++) if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]); /* vflush system vnodes */ error = vflush(mp,NULLVP,flags); if (error) { /* XXX should this be panic() ? */ printf("ntfs_unmount: vflush failed(sysnodes): %d\n",error); } /* Check if the type of device node isn't VBAD before * touching v_specinfo. If the device vnode is revoked, the * field is NULL and touching it causes null pointer derefercence. */ if (ntmp->ntm_devvp->v_type != VBAD) ntmp->ntm_devvp->v_specmountpoint = NULL; /* lock the device vnode before calling VOP_CLOSE() */ vn_lock(ntmp->ntm_devvp, LK_EXCLUSIVE | LK_RETRY); vinvalbuf(ntmp->ntm_devvp, V_SAVE, NOCRED, p, 0, INFSLP); (void)VOP_CLOSE(ntmp->ntm_devvp, FREAD, NOCRED, p); vput(ntmp->ntm_devvp); /* free the toupper table, if this has been last mounted ntfs volume */ ntfs_toupper_unuse(p); DPRINTF("ntfs_unmount: freeing memory...\n"); free(ntmp->ntm_ad, M_NTFSMNT, 0); free(ntmp, M_NTFSMNT, 0); mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; return (0); } int ntfs_root(struct mount *mp, struct vnode **vpp) { struct vnode *nvp; int error = 0; DPRINTF("ntfs_root(): sysvn: %p\n", VFSTONTFS(mp)->ntm_sysvn[NTFS_ROOTINO]); error = VFS_VGET(mp, (ino_t)NTFS_ROOTINO, &nvp); if(error) { printf("ntfs_root: VFS_VGET failed: %d\n",error); return (error); } *vpp = nvp; return (0); } /* * Do operations associated with quotas, not supported */ int ntfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p) { return EOPNOTSUPP; } int ntfs_calccfree(struct ntfsmount *ntmp, cn_t *cfreep) { struct vnode *vp; u_int8_t *tmp; int j, error; cn_t cfree = 0; uint64_t bmsize, offset; size_t chunksize, i; vp = ntmp->ntm_sysvn[NTFS_BITMAPINO]; bmsize = VTOF(vp)->f_size; if (bmsize > 1024 * 1024) chunksize = 1024 * 1024; else chunksize = bmsize; tmp = malloc(chunksize, M_TEMP, M_WAITOK); for (offset = 0; offset < bmsize; offset += chunksize) { if (chunksize > bmsize - offset) chunksize = bmsize - offset; error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL, offset, chunksize, tmp, NULL); if (error) goto out; for (i = 0; i < chunksize; i++) for (j = 0; j < 8; j++) if (~tmp[i] & (1 << j)) cfree++; } *cfreep = cfree; out: free(tmp, M_TEMP, 0); return(error); } int ntfs_statfs(struct mount *mp, struct statfs *sbp, struct proc *p) { struct ntfsmount *ntmp = VFSTONTFS(mp); u_int64_t mftallocated; DPRINTF("ntfs_statfs():\n"); mftallocated = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_allocated; sbp->f_bsize = ntmp->ntm_bps; sbp->f_iosize = ntmp->ntm_bps * ntmp->ntm_spc; sbp->f_blocks = ntmp->ntm_bootfile.bf_spv; sbp->f_bfree = sbp->f_bavail = ntfs_cntobn(ntmp->ntm_cfree); sbp->f_ffree = sbp->f_favail = sbp->f_bfree / ntmp->ntm_bpmftrec; sbp->f_files = mftallocated / ntfs_bntob(ntmp->ntm_bpmftrec) + sbp->f_ffree; copy_statfs_info(sbp, mp); return (0); } int ntfs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { /*DPRINTF("ntfs_sync():\n");*/ return (0); } int ntfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) { struct ntfid *ntfhp = (struct ntfid *)fhp; int error; DDPRINTF("ntfs_fhtovp(): %s: %u\n", mp->mnt_stat.f_mntonname, ntfhp->ntfid_ino); error = ntfs_vgetex(mp, ntfhp->ntfid_ino, ntfhp->ntfid_attr, NULL, LK_EXCLUSIVE | LK_RETRY, 0, vpp); /* XXX */ if (error != 0) { *vpp = NULLVP; return (error); } /* XXX as unlink/rmdir/mkdir/creat are not currently possible * with NTFS, we don't need to check anything else for now */ return (0); } int ntfs_vptofh(struct vnode *vp, struct fid *fhp) { struct ntnode *ntp; struct ntfid *ntfhp; struct fnode *fn; DDPRINTF("ntfs_fhtovp(): %s: %p\n", vp->v_mount->mnt_stat.f_mntonname, vp); fn = VTOF(vp); ntp = VTONT(vp); ntfhp = (struct ntfid *)fhp; ntfhp->ntfid_len = sizeof(struct ntfid); ntfhp->ntfid_ino = ntp->i_number; ntfhp->ntfid_attr = fn->f_attrtype; #ifdef notyet ntfhp->ntfid_gen = ntp->i_gen; #endif return (0); } int ntfs_vgetex(struct mount *mp, ntfsino_t ino, u_int32_t attrtype, char *attrname, u_long lkflags, u_long flags, struct vnode **vpp) { int error; struct ntfsmount *ntmp; struct ntnode *ip; struct fnode *fp; struct vnode *vp; enum vtype f_type; DPRINTF("ntfs_vgetex: ino: %u, attr: 0x%x:%s, lkf: 0x%lx, f: 0x%lx\n", ino, attrtype, attrname ? attrname : "", lkflags, flags); ntmp = VFSTONTFS(mp); *vpp = NULL; /* Get ntnode */ error = ntfs_ntlookup(ntmp, ino, &ip); if (error) { printf("ntfs_vget: ntfs_ntget failed\n"); return (error); } /* It may be not initialized fully, so force load it */ if (!(flags & VG_DONTLOADIN) && !(ip->i_flag & IN_LOADED)) { error = ntfs_loadntnode(ntmp, ip); if(error) { printf("ntfs_vget: CAN'T LOAD ATTRIBUTES FOR INO: %d\n", ip->i_number); ntfs_ntput(ip); return (error); } } error = ntfs_fget(ntmp, ip, attrtype, attrname, &fp); if (error) { printf("ntfs_vget: ntfs_fget failed\n"); ntfs_ntput(ip); return (error); } if (!(flags & VG_DONTVALIDFN) && !(fp->f_flag & FN_VALID)) { if ((ip->i_frflag & NTFS_FRFLAG_DIR) && (fp->f_attrtype == NTFS_A_DATA && fp->f_attrname == NULL)) { f_type = VDIR; } else if (flags & VG_EXT) { f_type = VNON; fp->f_size = fp->f_allocated = 0; } else { f_type = VREG; error = ntfs_filesize(ntmp, fp, &fp->f_size, &fp->f_allocated); if (error) { ntfs_ntput(ip); return (error); } } fp->f_flag |= FN_VALID; } /* * We may be calling vget() now. To avoid potential deadlock, we need * to release ntnode lock, since due to locking order vnode * lock has to be acquired first. * ntfs_fget() bumped ntnode usecount, so ntnode won't be recycled * prematurely. */ ntfs_ntput(ip); if (FTOV(fp)) { /* vget() returns error if the vnode has been recycled */ if (vget(FTOV(fp), lkflags) == 0) { *vpp = FTOV(fp); return (0); } } error = getnewvnode(VT_NTFS, ntmp->ntm_mountp, &ntfs_vops, &vp); if(error) { ntfs_frele(fp); ntfs_ntput(ip); return (error); } DPRINTF("ntfs_vget: vnode: %p for ntnode: %u\n", vp, ino); fp->f_vp = vp; vp->v_data = fp; vp->v_type = f_type; if (ino == NTFS_ROOTINO) vp->v_flag |= VROOT; if (lkflags & LK_TYPE_MASK) { error = vn_lock(vp, lkflags); if (error) { vput(vp); return (error); } } *vpp = vp; return (0); } int ntfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) { if (ino > (ntfsino_t)-1) panic("ntfs_vget: alien ino_t %llu", (unsigned long long)ino); return ntfs_vgetex(mp, ino, NTFS_A_DATA, NULL, LK_EXCLUSIVE | LK_RETRY, 0, vpp); /* XXX */ } const struct vfsops ntfs_vfsops = { .vfs_mount = ntfs_mount, .vfs_start = ntfs_start, .vfs_unmount = ntfs_unmount, .vfs_root = ntfs_root, .vfs_quotactl = ntfs_quotactl, .vfs_statfs = ntfs_statfs, .vfs_sync = ntfs_sync, .vfs_vget = ntfs_vget, .vfs_fhtovp = ntfs_fhtovp, .vfs_vptofh = ntfs_vptofh, .vfs_init = ntfs_init, .vfs_sysctl = ntfs_sysctl, .vfs_checkexp = ntfs_checkexp, };
98 98 98 16 2 98 17 17 11 4 3 1 87 11 10 1 27 62 51 1 44 1 5 7 37 5 1 2 1 1 120 5 32 2 1 1 1 22 10 79 94 19 5 4 2 2 2 3 7 5 1 50 2 1 2 2 4 8 1 1 6 6 4 3 1 /* $OpenBSD: raw_ip.c,v 1.119 2019/02/04 21:40:52 bluhm Exp $ */ /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 * * NRL grants permission for redistribution and use in source and binary * forms, with or without modification, of the software and documentation * created at NRL provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgements: * This product includes software developed by the University of * California, Berkeley and its contributors. * This product includes software developed at the Information * Technology Division, US Naval Research Laboratory. * 4. Neither the name of the NRL nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation * are those of the authors and should not be interpreted as representing * official policies, either expressed or implied, of the US Naval * Research Laboratory (NRL). */ #include <sys/param.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/protosw.h> #include <sys/socketvar.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_mroute.h> #include <netinet/ip_var.h> #include <netinet/in_pcb.h> #include <netinet/in_var.h> #include <netinet/ip_icmp.h> #include <net/pfvar.h> #include "pf.h" struct inpcbtable rawcbtable; /* * Nominal space allocated to a raw ip socket. */ #define RIPSNDQ 8192 #define RIPRCVQ 8192 /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ void rip_init(void) { in_pcbinit(&rawcbtable, 1); } struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; struct mbuf *rip_chkhdr(struct mbuf *, struct mbuf *); int rip_input(struct mbuf **mp, int *offp, int proto, int af) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last = NULL; struct in_addr *key; struct mbuf *opts = NULL; struct counters_ref ref; uint64_t *counters; KASSERT(af == AF_INET); ripsrc.sin_addr = ip->ip_src; key = &ip->ip_dst; #if NPF > 0 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { struct pf_divert *divert; divert = pf_find_divert(m); KASSERT(divert != NULL); switch (divert->type) { case PF_DIVERT_TO: key = &divert->addr.v4; break; case PF_DIVERT_REPLY: break; default: panic("%s: unknown divert type %d, mbuf %p, divert %p", __func__, divert->type, m, divert); } } #endif NET_ASSERT_LOCKED(); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) continue; #ifdef INET6 if (inp->inp_flags & INP_IPV6) continue; #endif if (rtable_l2(inp->inp_rtableid) != rtable_l2(m->m_pkthdr.ph_rtableid)) continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) continue; if (inp->inp_laddr.s_addr && inp->inp_laddr.s_addr != key->s_addr) continue; if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, n); if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, sintosa(&ripsrc), n, opts) == 0) { /* should notify about lost packet */ m_freem(n); m_freem(opts); } else sorwakeup(last->inp_socket); opts = NULL; } } last = inp; } if (last) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, m); if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv, sintosa(&ripsrc), m, opts) == 0) { m_freem(m); m_freem(opts); } else sorwakeup(last->inp_socket); } else { if (ip->ip_p != IPPROTO_ICMP) icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); else m_freem(m); counters = counters_enter(&ref, ipcounters); counters[ips_noproto]++; counters[ips_delivered]--; counters_leave(&ref, ipcounters); } return IPPROTO_DONE; } /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, struct mbuf *control) { struct ip *ip; struct inpcb *inp; int flags, error; inp = sotoinpcb(so); flags = IP_ALLOWBROADCAST; /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { m_freem(m); return (EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); if (!m) return (ENOBUFS); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip.ip_tos; ip->ip_off = htons(0); ip->ip_p = inp->inp_ip.ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst = satosin(dstaddr)->sin_addr; ip->ip_ttl = inp->inp_ip.ip_ttl ? inp->inp_ip.ip_ttl : MAXTTL; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return (EMSGSIZE); } m = rip_chkhdr(m, inp->inp_options); if (m == NULL) return (EINVAL); ip = mtod(m, struct ip *); if (ip->ip_id == 0) ip->ip_id = htons(ip_randomid()); /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat_inc(ips_rawout); } #ifdef INET6 /* * A thought: Even though raw IP shouldn't be able to set IPv6 * multicast options, if it does, the last parameter to * ip_output should be guarded against v6/v4 problems. */ #endif /* force routing table */ m->m_pkthdr.ph_rtableid = inp->inp_rtableid; #if NPF > 0 if (inp->inp_socket->so_state & SS_ISCONNECTED && ip->ip_p != IPPROTO_ICMP) pf_mbuf_link_inpcb(m, inp); #endif error = ip_output(m, inp->inp_options, &inp->inp_route, flags, inp->inp_moptions, inp, 0); return (error); } struct mbuf * rip_chkhdr(struct mbuf *m, struct mbuf *options) { struct ip *ip; int hlen, opt, optlen, cnt; u_char *cp; if (m->m_pkthdr.len < sizeof(struct ip)) { m_freem(m); return NULL; } m = m_pullup(m, sizeof (struct ip)); if (m == NULL) return NULL; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; /* Don't allow packet length sizes that will crash. */ if (hlen < sizeof (struct ip) || ntohs(ip->ip_len) < hlen || ntohs(ip->ip_len) != m->m_pkthdr.len) { m_freem(m); return NULL; } m = m_pullup(m, hlen); if (m == NULL) return NULL; ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { m_freem(m); return NULL; } /* * Don't allow both user specified and setsockopt options. * If options are present verify them. */ if (hlen != sizeof(struct ip)) { if (options) { m_freem(m); return NULL; } else { cp = (u_char *)(ip + 1); cnt = hlen - sizeof(struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) { m_freem(m); return NULL; } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { m_freem(m); return NULL; } } } } } return m; } /* * Raw IP socket option processing. */ int rip_ctloutput(int op, struct socket *so, int level, int optname, struct mbuf *m) { struct inpcb *inp = sotoinpcb(so); int error; if (level != IPPROTO_IP) return (EINVAL); switch (optname) { case IP_HDRINCL: error = 0; if (op == PRCO_SETOPT) { if (m == NULL || m->m_len < sizeof (int)) error = EINVAL; else if (*mtod(m, int *)) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; } else { m->m_len = sizeof(int); *mtod(m, int *) = inp->inp_flags & INP_HDRINCL; } return (error); case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: #ifdef MROUTING switch (op) { case PRCO_SETOPT: error = ip_mrouter_set(so, optname, m); break; case PRCO_GETOPT: error = ip_mrouter_get(so, optname, m); break; default: error = EINVAL; break; } return (error); #else return (EOPNOTSUPP); #endif } return (ip_ctloutput(op, so, level, optname, m)); } u_long rip_sendspace = RIPSNDQ; u_long rip_recvspace = RIPRCVQ; /*ARGSUSED*/ int rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) { struct inpcb *inp; int error = 0; if (req == PRU_CONTROL) return (in_control(so, (u_long)m, (caddr_t)nam, (struct ifnet *)control)); soassertlocked(so); inp = sotoinpcb(so); if (inp == NULL) { error = EINVAL; goto release; } switch (req) { case PRU_DISCONNECT: if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; break; } soisdisconnected(so); inp->inp_faddr.s_addr = INADDR_ANY; break; case PRU_ABORT: soisdisconnected(so); if (inp == NULL) panic("rip_abort"); #ifdef MROUTING if (so == ip_mrouter[inp->inp_rtableid]) ip_mrouter_done(so); #endif in_pcbdetach(inp); break; case PRU_BIND: { struct sockaddr_in *addr; if ((error = in_nam2sin(nam, &addr))) break; if (!((so->so_options & SO_BINDANY) || addr->sin_addr.s_addr == INADDR_ANY || addr->sin_addr.s_addr == INADDR_BROADCAST || in_broadcast(addr->sin_addr, inp->inp_rtableid) || ifa_ifwithaddr(sintosa(addr), inp->inp_rtableid))) { error = EADDRNOTAVAIL; break; } inp->inp_laddr = addr->sin_addr; break; } case PRU_CONNECT: { struct sockaddr_in *addr; if ((error = in_nam2sin(nam, &addr))) break; inp->inp_faddr = addr->sin_addr; soisconnected(so); break; } case PRU_CONNECT2: error = EOPNOTSUPP; break; /* * Mark the connection as being incapable of further input. */ case PRU_SHUTDOWN: socantsendmore(so); break; /* * Ship a packet out. The appropriate raw output * routine handles any massaging necessary. */ case PRU_SEND: { struct sockaddr_in dst; memset(&dst, 0, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_len = sizeof(dst); if (so->so_state & SS_ISCONNECTED) { if (nam) { error = EISCONN; break; } dst.sin_addr = inp->inp_faddr; } else { struct sockaddr_in *addr; if (nam == NULL) { error = ENOTCONN; break; } if ((error = in_nam2sin(nam, &addr))) break; dst.sin_addr = addr->sin_addr; } #ifdef IPSEC /* XXX Find an IPsec TDB */ #endif error = rip_output(m, so, sintosa(&dst), NULL); m = NULL; break; } case PRU_SENSE: /* * stat: don't bother with a blocksize. */ break; /* * Not supported. */ case PRU_LISTEN: case PRU_ACCEPT: case PRU_SENDOOB: case PRU_RCVD: case PRU_RCVOOB: error = EOPNOTSUPP; break; case PRU_SOCKADDR: in_setsockaddr(inp, nam); break; case PRU_PEERADDR: in_setpeeraddr(inp, nam); break; default: panic("rip_usrreq"); } release: if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { m_freem(control); m_freem(m); } return (error); } int rip_attach(struct socket *so, int proto) { struct inpcb *inp; int error; if (so->so_pcb) panic("rip_attach"); if ((so->so_state & SS_PRIV) == 0) return EACCES; if (proto < 0 || proto >= IPPROTO_MAX) return EPROTONOSUPPORT; if ((error = soreserve(so, rip_sendspace, rip_recvspace))) return error; NET_ASSERT_LOCKED(); if ((error = in_pcballoc(so, &rawcbtable))) return error; inp = sotoinpcb(so); inp->inp_ip.ip_p = proto; return 0; } int rip_detach(struct socket *so) { struct inpcb *inp = sotoinpcb(so); soassertlocked(so); if (inp == NULL) return (EINVAL); #ifdef MROUTING if (so == ip_mrouter[inp->inp_rtableid]) ip_mrouter_done(so); #endif in_pcbdetach(inp); return (0); }
2 1 2 2 1 1 1 2 /* $OpenBSD: clock_subr.c,v 1.6 2016/08/26 07:09:56 guenther Exp $ */ /* $NetBSD: clock_subr.c,v 1.3 1997/03/15 18:11:16 is Exp $ */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: clock.c 1.18 91/01/21$ * * @(#)clock.c 8.2 (Berkeley) 1/12/94 */ /* * Generic routines to convert between a POSIX date * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec * Derived from arch/hp300/hp300/clock.c */ #include <sys/types.h> #include <sys/time.h> #include <sys/systm.h> static inline int leapyear(int year); #define FEBRUARY 2 #define days_in_year(a) (leapyear(a) ? 366 : 365) #define days_in_month(a) (month_days[(a) - 1]) static const int month_days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; /* * This inline avoids some unnecessary modulo operations * as compared with the usual macro: * ( ((year % 4) == 0 && * (year % 100) != 0) || * ((year % 400) == 0) ) * It is otherwise equivalent. */ static inline int leapyear(int year) { int rv = 0; if ((year & 3) == 0) { rv = 1; if ((year % 100) == 0) { rv = 0; if ((year % 400) == 0) rv = 1; } } return (rv); } time_t clock_ymdhms_to_secs(struct clock_ymdhms *dt) { time_t secs; int i, year, days; year = dt->dt_year; /* * Compute days since start of time. * First from years, then from months. */ days = 0; for (i = POSIX_BASE_YEAR; i < year; i++) days += days_in_year(i); if (leapyear(year) && dt->dt_mon > FEBRUARY) days++; /* Months */ for (i = 1; i < dt->dt_mon; i++) days += days_in_month(i); days += (dt->dt_day - 1); /* Add hours, minutes, seconds. */ secs = (time_t)((days * 24 + dt->dt_hour) * 60 + dt->dt_min) * 60 + dt->dt_sec; return (secs); } /* This function uses a copy of month_days[] */ #undef days_in_month #define days_in_month(a) (mthdays[(a) - 1]) void clock_secs_to_ymdhms(time_t secs, struct clock_ymdhms *dt) { int mthdays[12]; int i, days; int rsec; /* remainder seconds */ memcpy(mthdays, month_days, sizeof(mthdays)); days = secs / SECDAY; rsec = secs % SECDAY; /* Day of week (Note: 1/1/1970 was a Thursday) */ dt->dt_wday = (days + 4) % 7; /* Subtract out whole years, counting them in i. */ for (i = POSIX_BASE_YEAR; days >= days_in_year(i); i++) days -= days_in_year(i); dt->dt_year = i; /* Subtract out whole months, counting them in i. */ if (leapyear(i)) days_in_month(FEBRUARY) = 29; for (i = 1; days >= days_in_month(i); i++) days -= days_in_month(i); dt->dt_mon = i; /* Days are what is left over (+1) from all that. */ dt->dt_day = days + 1; /* Hours, minutes, seconds are easy */ dt->dt_hour = rsec / 3600; rsec = rsec % 3600; dt->dt_min = rsec / 60; rsec = rsec % 60; dt->dt_sec = rsec; }
522 282 98 285 280 280 95 280 103 103 /* $OpenBSD: ufs_ihash.c,v 1.25 2021/03/11 13:31:36 jsg Exp $ */ /* $NetBSD: ufs_ihash.c,v 1.3 1996/02/09 22:36:04 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_ihash.c 8.4 (Berkeley) 12/30/93 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/vnode.h> #include <sys/malloc.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/ufs_extern.h> #include <crypto/siphash.h> /* * Structures associated with inode caching. */ LIST_HEAD(ihashhead, inode) *ihashtbl; u_long ihash; /* size of hash table - 1 */ SIPHASH_KEY ihashkey; struct ihashhead *ufs_ihash(dev_t, ufsino_t); #define INOHASH(device, inum) ufs_ihash((device), (inum)) struct ihashhead * ufs_ihash(dev_t dev, ufsino_t inum) { SIPHASH_CTX ctx; SipHash24_Init(&ctx, &ihashkey); SipHash24_Update(&ctx, &dev, sizeof(dev)); SipHash24_Update(&ctx, &inum, sizeof(inum)); return (&ihashtbl[SipHash24_End(&ctx) & ihash]); } /* * Initialize inode hash table. */ void ufs_ihashinit(void) { ihashtbl = hashinit(initialvnodes, M_UFSMNT, M_WAITOK, &ihash); arc4random_buf(&ihashkey, sizeof(ihashkey)); } /* * Use the device/inum pair to find the incore inode, and return a pointer * to it. If it is in core, return it, even if it is locked. */ struct vnode * ufs_ihashlookup(dev_t dev, ufsino_t inum) { struct inode *ip; struct ihashhead *ipp; /* XXXLOCKING lock hash list */ ipp = INOHASH(dev, inum); LIST_FOREACH(ip, ipp, i_hash) { if (inum == ip->i_number && dev == ip->i_dev) break; } /* XXXLOCKING unlock hash list? */ if (ip) return (ITOV(ip)); return (NULLVP); } /* * Use the device/inum pair to find the incore inode, and return a pointer * to it. If it is in core, but locked, wait for it. */ struct vnode * ufs_ihashget(dev_t dev, ufsino_t inum) { struct ihashhead *ipp; struct inode *ip; struct vnode *vp; loop: /* XXXLOCKING lock hash list */ ipp = INOHASH(dev, inum); LIST_FOREACH(ip, ipp, i_hash) { if (inum == ip->i_number && dev == ip->i_dev) { vp = ITOV(ip); /* XXXLOCKING unlock hash list? */ if (vget(vp, LK_EXCLUSIVE)) goto loop; return (vp); } } /* XXXLOCKING unlock hash list? */ return (NULL); } /* * Insert the inode into the hash table, and return it locked. */ int ufs_ihashins(struct inode *ip) { struct inode *curip; struct ihashhead *ipp; dev_t dev = ip->i_dev; ufsino_t inum = ip->i_number; /* lock the inode, then put it on the appropriate hash list */ rrw_enter(&ip->i_lock, RW_WRITE); /* XXXLOCKING lock hash list */ ipp = INOHASH(dev, inum); LIST_FOREACH(curip, ipp, i_hash) { if (inum == curip->i_number && dev == curip->i_dev) { /* XXXLOCKING unlock hash list? */ rrw_exit(&ip->i_lock); return (EEXIST); } } SET(ip->i_flag, IN_HASHED); LIST_INSERT_HEAD(ipp, ip, i_hash); /* XXXLOCKING unlock hash list? */ return (0); } /* * Remove the inode from the hash table. */ void ufs_ihashrem(struct inode *ip) { /* XXXLOCKING lock hash list */ if (ip->i_hash.le_prev == NULL) return; if (ISSET(ip->i_flag, IN_HASHED)) { LIST_REMOVE(ip, i_hash); CLR(ip->i_flag, IN_HASHED); } #ifdef DIAGNOSTIC ip->i_hash.le_next = NULL; ip->i_hash.le_prev = NULL; #endif /* XXXLOCKING unlock hash list? */ }
/* $OpenBSD: ip_ipcomp.h,v 1.11 2020/09/01 01:53:34 gnezdo Exp $ */ /* * Copyright (c) 2001 Jean-Jacques Bernard-Gundol (jj@wabbitt.org) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* IP payload compression protocol (IPComp), see RFC 2393 */ #ifndef _NETINET_IP_IPCOMP_H_ #define _NETINET_IP_IPCOMP_H_ struct ipcompstat { uint64_t ipcomps_hdrops; /* Packet shorter than header shows */ uint64_t ipcomps_nopf; /* Protocol family not supported */ uint64_t ipcomps_notdb; uint64_t ipcomps_badkcr; uint64_t ipcomps_qfull; uint64_t ipcomps_noxform; uint64_t ipcomps_wrap; uint64_t ipcomps_input; /* Input IPcomp packets */ uint64_t ipcomps_output; /* Output IPcomp packets */ uint64_t ipcomps_invalid; /* Trying to use an invalid * TDB */ uint64_t ipcomps_ibytes; /* Input bytes */ uint64_t ipcomps_obytes; /* Output bytes */ uint64_t ipcomps_toobig; /* Packet got larger than * IP_MAXPACKET */ uint64_t ipcomps_pdrops; /* Packet blocked due to policy */ uint64_t ipcomps_crypto; /* "Crypto" processing failure */ uint64_t ipcomps_minlen; /* packets too short for compress */ uint64_t ipcomps_outfail; /* Packet output failure */ }; /* IPCOMP header */ struct ipcomp { u_int8_t ipcomp_nh; /* Next header */ u_int8_t ipcomp_flags; /* Flags: reserved field: 0 */ u_int16_t ipcomp_cpi; /* Compression Parameter Index, * Network order */ }; /* Length of IPCOMP header */ #define IPCOMP_HLENGTH 4 /* * Names for IPCOMP sysctl objects */ #define IPCOMPCTL_ENABLE 1 /* Enable COMP processing */ #define IPCOMPCTL_STATS 2 /* COMP stats */ #define IPCOMPCTL_MAXID 3 #define IPCOMPCTL_NAMES { \ { 0, 0 }, \ { "enable", CTLTYPE_INT }, \ { "stats", CTLTYPE_STRUCT }, \ } #ifdef _KERNEL #include <sys/percpu.h> enum ipcomp_counters { ipcomps_hdrops, /* Packet shorter than header shows */ ipcomps_nopf, /* Protocol family not supported */ ipcomps_notdb, ipcomps_badkcr, ipcomps_qfull, ipcomps_noxform, ipcomps_wrap, ipcomps_input, /* Input IPcomp packets */ ipcomps_output, /* Output IPcomp packets */ ipcomps_invalid, /* Trying to use an invalid * TDB */ ipcomps_ibytes, /* Input bytes */ ipcomps_obytes, /* Output bytes */ ipcomps_toobig, /* Packet got larger than * IP_MAXPACKET */ ipcomps_pdrops, /* Packet blocked due to policy */ ipcomps_crypto, /* "Crypto" processing failure */ ipcomps_minlen, /* packets too short for compress */ ipcomps_outfail, /* Packet output failure */ ipcomps_ncounters }; extern struct cpumem *ipcompcounters; static inline void ipcompstat_inc(enum ipcomp_counters c) { counters_inc(ipcompcounters, c); } static inline void ipcompstat_add(enum ipcomp_counters c, uint64_t v) { counters_add(ipcompcounters, c, v); } extern int ipcomp_enable; #endif /* _KERNEL */ #endif /* _NETINET_IP_IPCOMP_H_ */
112 /* * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * */ #include "amdgpu.h" #include "amdgpu_smu.h" #include "smu_internal.h" #include "smu_v12_0_ppsmc.h" #include "smu12_driver_if.h" #include "smu_v12_0.h" #include "renoir_ppt.h" #define CLK_MAP(clk, index) \ [SMU_##clk] = {1, (index)} #define MSG_MAP(msg, index) \ [SMU_MSG_##msg] = {1, (index)} #define TAB_MAP_VALID(tab) \ [SMU_TABLE_##tab] = {1, TABLE_##tab} #define TAB_MAP_INVALID(tab) \ [SMU_TABLE_##tab] = {0, TABLE_##tab} static struct smu_12_0_cmn2aisc_mapping renoir_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion), MSG_MAP(PowerUpGfx, PPSMC_MSG_PowerUpGfx), MSG_MAP(AllowGfxOff, PPSMC_MSG_EnableGfxOff), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisableGfxOff), MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile), MSG_MAP(PowerUpIspByTile, PPSMC_MSG_PowerUpIspByTile), MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn), MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn), MSG_MAP(PowerDownSdma, PPSMC_MSG_PowerDownSdma), MSG_MAP(PowerUpSdma, PPSMC_MSG_PowerUpSdma), MSG_MAP(SetHardMinIspclkByFreq, PPSMC_MSG_SetHardMinIspclkByFreq), MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn), MSG_MAP(Spare1, PPSMC_MSG_spare1), MSG_MAP(Spare2, PPSMC_MSG_spare2), MSG_MAP(SetAllowFclkSwitch, PPSMC_MSG_SetAllowFclkSwitch), MSG_MAP(SetMinVideoGfxclkFreq, PPSMC_MSG_SetMinVideoGfxclkFreq), MSG_MAP(ActiveProcessNotify, PPSMC_MSG_ActiveProcessNotify), MSG_MAP(SetCustomPolicy, PPSMC_MSG_SetCustomPolicy), MSG_MAP(SetVideoFps, PPSMC_MSG_SetVideoFps), MSG_MAP(NumOfDisplays, PPSMC_MSG_SetDisplayCount), MSG_MAP(QueryPowerLimit, PPSMC_MSG_QueryPowerLimit), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh), MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow), MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram), MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu), MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDeviceDriverReset), MSG_MAP(SetGfxclkOverdriveByFreqVid, PPSMC_MSG_SetGfxclkOverdriveByFreqVid), MSG_MAP(SetHardMinDcfclkByFreq, PPSMC_MSG_SetHardMinDcfclkByFreq), MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq), MSG_MAP(ControlIgpuATS, PPSMC_MSG_ControlIgpuATS), MSG_MAP(SetMinVideoFclkFreq, PPSMC_MSG_SetMinVideoFclkFreq), MSG_MAP(SetMinDeepSleepDcfclk, PPSMC_MSG_SetMinDeepSleepDcfclk), MSG_MAP(ForcePowerDownGfx, PPSMC_MSG_ForcePowerDownGfx), MSG_MAP(SetPhyclkVoltageByFreq, PPSMC_MSG_SetPhyclkVoltageByFreq), MSG_MAP(SetDppclkVoltageByFreq, PPSMC_MSG_SetDppclkVoltageByFreq), MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn), MSG_MAP(EnablePostCode, PPSMC_MSG_EnablePostCode), MSG_MAP(GetGfxclkFrequency, PPSMC_MSG_GetGfxclkFrequency), MSG_MAP(GetFclkFrequency, PPSMC_MSG_GetFclkFrequency), MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxclkFrequency), MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxclkFrequency), MSG_MAP(SoftReset, PPSMC_MSG_SoftReset), MSG_MAP(SetGfxCGPG, PPSMC_MSG_SetGfxCGPG), MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk), MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk), MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq), MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq), MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn), MSG_MAP(PowerGateMmHub, PPSMC_MSG_PowerGateMmHub), MSG_MAP(UpdatePmeRestore, PPSMC_MSG_UpdatePmeRestore), MSG_MAP(GpuChangeState, PPSMC_MSG_GpuChangeState), MSG_MAP(SetPowerLimitPercentage, PPSMC_MSG_SetPowerLimitPercentage), MSG_MAP(ForceGfxContentSave, PPSMC_MSG_ForceGfxContentSave), MSG_MAP(EnableTmdp48MHzRefclkPwrDown, PPSMC_MSG_EnableTmdp48MHzRefclkPwrDown), MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg), MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg), MSG_MAP(PowerGateAtHub, PPSMC_MSG_PowerGateAtHub), MSG_MAP(SetSoftMinJpeg, PPSMC_MSG_SetSoftMinJpeg), MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq), }; static struct smu_12_0_cmn2aisc_mapping renoir_clk_map[SMU_CLK_COUNT] = { CLK_MAP(GFXCLK, CLOCK_GFXCLK), CLK_MAP(SCLK, CLOCK_GFXCLK), CLK_MAP(SOCCLK, CLOCK_SOCCLK), CLK_MAP(UCLK, CLOCK_FCLK), CLK_MAP(MCLK, CLOCK_FCLK), }; static struct smu_12_0_cmn2aisc_mapping renoir_table_map[SMU_TABLE_COUNT] = { TAB_MAP_VALID(WATERMARKS), TAB_MAP_INVALID(CUSTOM_DPM), TAB_MAP_VALID(DPMCLOCKS), TAB_MAP_VALID(SMU_METRICS), }; static int renoir_get_smu_msg_index(struct smu_context *smc, uint32_t index) { struct smu_12_0_cmn2aisc_mapping mapping; if (index >= SMU_MSG_MAX_COUNT) return -EINVAL; mapping = renoir_message_map[index]; if (!(mapping.valid_mapping)) return -EINVAL; return mapping.map_to; } static int renoir_get_smu_clk_index(struct smu_context *smc, uint32_t index) { struct smu_12_0_cmn2aisc_mapping mapping; if (index >= SMU_CLK_COUNT) return -EINVAL; mapping = renoir_clk_map[index]; if (!(mapping.valid_mapping)) { return -EINVAL; } return mapping.map_to; } static int renoir_get_smu_table_index(struct smu_context *smc, uint32_t index) { struct smu_12_0_cmn2aisc_mapping mapping; if (index >= SMU_TABLE_COUNT) return -EINVAL; mapping = renoir_table_map[index]; if (!(mapping.valid_mapping)) return -EINVAL; return mapping.map_to; } static int renoir_get_metrics_table(struct smu_context *smu, SmuMetrics_t *metrics_table) { struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; mutex_lock(&smu->metrics_lock); if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { pr_info("Failed to export SMU metrics table!\n"); mutex_unlock(&smu->metrics_lock); return ret; } smu_table->metrics_time = jiffies; } memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t)); mutex_unlock(&smu->metrics_lock); return ret; } static int renoir_tables_init(struct smu_context *smu, struct smu_table *tables) { struct smu_table_context *smu_table = &smu->smu_table; SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL); if (!smu_table->clocks_table) return -ENOMEM; smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; smu_table->watermarks_table = kzalloc(sizeof(Watermarks_t), GFP_KERNEL); if (!smu_table->watermarks_table) return -ENOMEM; return 0; } /** * This interface just for getting uclk ultimate freq and should't introduce * other likewise function result in overmuch callback. */ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t dpm_level, uint32_t *freq) { DpmClocks_t *clk_table = smu->smu_table.clocks_table; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; GET_DPM_CUR_FREQ(clk_table, clk_type, dpm_level, *freq); return 0; } static int renoir_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; bool cur_value_match_level = false; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; memset(&metrics, 0, sizeof(metrics)); ret = renoir_get_metrics_table(smu, &metrics); if (ret) return ret; switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: /* retirve table returned paramters unit is MHz */ cur_value = metrics.ClockFrequency[CLOCK_GFXCLK]; ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max, false); if (!ret) { /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ if (cur_value == max) i = 2; else if (cur_value == min) i = 0; else i = 1; size += snprintf(buf + size, PAGE_SIZE - size, "0: %uMhz %s\n", min, i == 0 ? "*" : ""); size += snprintf(buf + size, PAGE_SIZE - size, "1: %uMhz %s\n", i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK, i == 1 ? "*" : ""); size += snprintf(buf + size, PAGE_SIZE - size, "2: %uMhz %s\n", max, i == 2 ? "*" : ""); } return size; case SMU_SOCCLK: count = NUM_SOCCLK_DPM_LEVELS; cur_value = metrics.ClockFrequency[CLOCK_SOCCLK]; break; case SMU_MCLK: count = NUM_MEMCLK_DPM_LEVELS; cur_value = metrics.ClockFrequency[CLOCK_FCLK]; break; case SMU_DCEFCLK: count = NUM_DCFCLK_DPM_LEVELS; cur_value = metrics.ClockFrequency[CLOCK_DCFCLK]; break; case SMU_FCLK: count = NUM_FCLK_DPM_LEVELS; cur_value = metrics.ClockFrequency[CLOCK_FCLK]; break; default: return -EINVAL; } for (i = 0; i < count; i++) { GET_DPM_CUR_FREQ(clk_table, clk_type, i, value); size += snprintf(buf + size, PAGE_SIZE - size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); if (cur_value == value) cur_value_match_level = true; } if (!cur_value_match_level) size += snprintf(buf + size, PAGE_SIZE - size, " %uMhz *\n", cur_value); return size; } static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context *smu) { enum amd_pm_state_type pm_type; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); if (!smu_dpm_ctx->dpm_context || !smu_dpm_ctx->dpm_current_power_state) return -EINVAL; switch (smu_dpm_ctx->dpm_current_power_state->classification.ui_label) { case SMU_STATE_UI_LABEL_BATTERY: pm_type = POWER_STATE_TYPE_BATTERY; break; case SMU_STATE_UI_LABEL_BALLANCED: pm_type = POWER_STATE_TYPE_BALANCED; break; case SMU_STATE_UI_LABEL_PERFORMANCE: pm_type = POWER_STATE_TYPE_PERFORMANCE; break; default: if (smu_dpm_ctx->dpm_current_power_state->classification.flags & SMU_STATE_CLASSIFICATION_FLAG_BOOT) pm_type = POWER_STATE_TYPE_INTERNAL_BOOT; else pm_type = POWER_STATE_TYPE_DEFAULT; break; } return pm_type; } static int renoir_dpm_set_uvd_enable(struct smu_context *smu, bool enable) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { /* vcn dpm on is a prerequisite for vcn power gate messages */ if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0, NULL); if (ret) return ret; } power_gate->vcn_gated = false; } else { if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn, NULL); if (ret) return ret; } power_gate->vcn_gated = true; } return ret; } static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpJpeg, 0, NULL); if (ret) return ret; } power_gate->jpeg_gated = false; } else { if (smu_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0, NULL); if (ret) return ret; } power_gate->jpeg_gated = true; } return ret; } static int renoir_get_current_clk_freq_by_table(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value) { int ret = 0, clk_id = 0; SmuMetrics_t metrics; ret = renoir_get_metrics_table(smu, &metrics); if (ret) return ret; clk_id = smu_clk_get_index(smu, clk_type); if (clk_id < 0) return clk_id; *value = metrics.ClockFrequency[clk_id]; return ret; } static int renoir_force_dpm_limit_value(struct smu_context *smu, bool highest) { int ret = 0, i = 0; uint32_t min_freq, max_freq, force_freq; enum smu_clk_type clk_type; enum smu_clk_type clks[] = { SMU_GFXCLK, SMU_MCLK, SMU_SOCCLK, }; for (i = 0; i < ARRAY_SIZE(clks); i++) { clk_type = clks[i]; ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); if (ret) return ret; force_freq = highest ? max_freq : min_freq; ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq, false); if (ret) return ret; } return ret; } static int renoir_unforce_dpm_levels(struct smu_context *smu) { int ret = 0, i = 0; uint32_t min_freq, max_freq; enum smu_clk_type clk_type; struct clk_feature_map { enum smu_clk_type clk_type; uint32_t feature; } clk_feature_map[] = { {SMU_GFXCLK, SMU_FEATURE_DPM_GFXCLK_BIT}, {SMU_MCLK, SMU_FEATURE_DPM_UCLK_BIT}, {SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT}, }; for (i = 0; i < ARRAY_SIZE(clk_feature_map); i++) { if (!smu_feature_is_enabled(smu, clk_feature_map[i].feature)) continue; clk_type = clk_feature_map[i].clk_type; ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); if (ret) return ret; ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); if (ret) return ret; } return ret; } static int renoir_get_gpu_temperature(struct smu_context *smu, uint32_t *value) { int ret = 0; SmuMetrics_t metrics; if (!value) return -EINVAL; ret = renoir_get_metrics_table(smu, &metrics); if (ret) return ret; *value = (metrics.GfxTemperature / 100) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } static int renoir_get_current_activity_percent(struct smu_context *smu, enum amd_pp_sensors sensor, uint32_t *value) { int ret = 0; SmuMetrics_t metrics; if (!value) return -EINVAL; ret = renoir_get_metrics_table(smu, &metrics); if (ret) return ret; switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: *value = metrics.AverageGfxActivity / 100; break; default: pr_err("Invalid sensor for retrieving clock activity\n"); return -EINVAL; } return 0; } static int renoir_get_workload_type(struct smu_context *smu, uint32_t profile) { uint32_t pplib_workload = 0; switch (profile) { case PP_SMC_POWER_PROFILE_FULLSCREEN3D: pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT; break; case PP_SMC_POWER_PROFILE_CUSTOM: pplib_workload = WORKLOAD_PPLIB_COUNT; break; case PP_SMC_POWER_PROFILE_VIDEO: pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT; break; case PP_SMC_POWER_PROFILE_VR: pplib_workload = WORKLOAD_PPLIB_VR_BIT; break; case PP_SMC_POWER_PROFILE_COMPUTE: pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT; break; default: return -EINVAL; } return pplib_workload; } static int renoir_get_profiling_clk_mask(struct smu_context *smu, enum amd_dpm_forced_level level, uint32_t *sclk_mask, uint32_t *mclk_mask, uint32_t *soc_mask) { if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { if (sclk_mask) *sclk_mask = 0; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { if (mclk_mask) *mclk_mask = 0; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { if(sclk_mask) /* The sclk as gfxclk and has three level about max/min/current */ *sclk_mask = 3 - 1; if(mclk_mask) *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; if(soc_mask) *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; } return 0; } /** * This interface get dpm clock table for dc */ static int renoir_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table) { DpmClocks_t *table = smu->smu_table.clocks_table; int i; if (!clock_table || !table) return -EINVAL; for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++) { clock_table->DcfClocks[i].Freq = table->DcfClocks[i].Freq; clock_table->DcfClocks[i].Vol = table->DcfClocks[i].Vol; } for (i = 0; i < NUM_SOCCLK_DPM_LEVELS; i++) { clock_table->SocClocks[i].Freq = table->SocClocks[i].Freq; clock_table->SocClocks[i].Vol = table->SocClocks[i].Vol; } for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { clock_table->FClocks[i].Freq = table->FClocks[i].Freq; clock_table->FClocks[i].Vol = table->FClocks[i].Vol; } for (i = 0; i< NUM_MEMCLK_DPM_LEVELS; i++) { clock_table->MemClocks[i].Freq = table->MemClocks[i].Freq; clock_table->MemClocks[i].Vol = table->MemClocks[i].Vol; } return 0; } static int renoir_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t mask) { int ret = 0 ; uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: if (soft_min_level > 2 || soft_max_level > 2) { pr_info("Currently sclk only support 3 levels on APU\n"); return -EINVAL; } ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min_freq, &max_freq, false); if (ret) return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, soft_max_level == 0 ? min_freq : soft_max_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : max_freq, NULL); if (ret) return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, soft_min_level == 2 ? max_freq : soft_min_level == 1 ? RENOIR_UMD_PSTATE_GFXCLK : min_freq, NULL); if (ret) return ret; break; case SMU_SOCCLK: GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max_freq, NULL); if (ret) return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinSocclkByFreq, min_freq, NULL); if (ret) return ret; break; case SMU_MCLK: case SMU_FCLK: GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max_freq, NULL); if (ret) return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min_freq, NULL); if (ret) return ret; break; default: break; } return ret; } static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) { int workload_type, ret; uint32_t profile_mode = input[size]; if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); return -EINVAL; } /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = smu_workload_get_type(smu, smu->power_profile_mode); if (workload_type < 0) { /* * TODO: If some case need switch to powersave/default power mode * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving. */ pr_err_once("Unsupported power profile mode %d on RENOIR\n",smu->power_profile_mode); return -EINVAL; } ret = smu_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, 1 << workload_type, NULL); if (ret) { pr_err_once("Fail to set workload type %d\n", workload_type); return ret; } smu->power_profile_mode = profile_mode; return 0; } static int renoir_set_peak_clock_by_device(struct smu_context *smu) { int ret = 0; uint32_t sclk_freq = 0, uclk_freq = 0; ret = smu_get_dpm_freq_range(smu, SMU_SCLK, NULL, &sclk_freq, false); if (ret) return ret; ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false); if (ret) return ret; ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &uclk_freq, false); if (ret) return ret; ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false); if (ret) return ret; return ret; } static int renoir_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) { int ret = 0; uint32_t sclk_mask, mclk_mask, soc_mask; switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: ret = smu_force_dpm_limit_value(smu, true); break; case AMD_DPM_FORCED_LEVEL_LOW: ret = smu_force_dpm_limit_value(smu, false); break; case AMD_DPM_FORCED_LEVEL_AUTO: case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: ret = smu_unforce_dpm_levels(smu); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: ret = smu_get_profiling_clk_mask(smu, level, &sclk_mask, &mclk_mask, &soc_mask); if (ret) return ret; smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: ret = renoir_set_peak_clock_by_device(smu); break; case AMD_DPM_FORCED_LEVEL_MANUAL: case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; } return ret; } /* save watermark settings into pplib smu structure, * also pass data to smu controller */ static int renoir_set_watermarks_table( struct smu_context *smu, void *watermarks, struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges) { int i; int ret = 0; Watermarks_t *table = watermarks; if (!table || !clock_ranges) return -EINVAL; if (clock_ranges->num_wm_dmif_sets > 4 || clock_ranges->num_wm_mcif_sets > 4) return -EINVAL; /* save into smu->smu_table.tables[SMU_TABLE_WATERMARKS]->cpu_addr*/ for (i = 0; i < clock_ranges->num_wm_dmif_sets; i++) { table->WatermarkRow[WM_DCFCLK][i].MinClock = cpu_to_le16((uint16_t) (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz)); table->WatermarkRow[WM_DCFCLK][i].MaxClock = cpu_to_le16((uint16_t) (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz)); table->WatermarkRow[WM_DCFCLK][i].MinMclk = cpu_to_le16((uint16_t) (clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz)); table->WatermarkRow[WM_DCFCLK][i].MaxMclk = cpu_to_le16((uint16_t) (clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz)); table->WatermarkRow[WM_DCFCLK][i].WmSetting = (uint8_t) clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id; } for (i = 0; i < clock_ranges->num_wm_mcif_sets; i++) { table->WatermarkRow[WM_SOCCLK][i].MinClock = cpu_to_le16((uint16_t) (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz)); table->WatermarkRow[WM_SOCCLK][i].MaxClock = cpu_to_le16((uint16_t) (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz)); table->WatermarkRow[WM_SOCCLK][i].MinMclk = cpu_to_le16((uint16_t) (clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz)); table->WatermarkRow[WM_SOCCLK][i].MaxMclk = cpu_to_le16((uint16_t) (clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz)); table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t) clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; } smu->watermarks_bitmap |= WATERMARKS_EXIST; /* pass data to smu controller */ if (!(smu->watermarks_bitmap & WATERMARKS_LOADED)) { ret = smu_write_watermarks_table(smu); if (ret) { pr_err("Failed to update WMTABLE!"); return ret; } smu->watermarks_bitmap |= WATERMARKS_LOADED; } return 0; } static int renoir_get_power_profile_mode(struct smu_context *smu, char *buf) { static const char *profile_name[] = { "BOOTUP_DEFAULT", "3D_FULL_SCREEN", "POWER_SAVING", "VIDEO", "VR", "COMPUTE", "CUSTOM"}; uint32_t i, size = 0; int16_t workload_type = 0; if (!smu->pm_enabled || !buf) return -EINVAL; for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { /* * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT * Not all profile modes are supported on arcturus. */ workload_type = smu_workload_get_type(smu, i); if (workload_type < 0) continue; size += snprintf(buf + size, PAGE_SIZE - size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } return size; } static int renoir_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) { int ret = 0; if (!data || !size) return -EINVAL; mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: ret = renoir_get_current_activity_percent(smu, sensor, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_TEMP: ret = renoir_get_gpu_temperature(smu, (uint32_t *)data); *size = 4; break; default: ret = smu_v12_0_read_sensor(smu, sensor, data, size); } mutex_unlock(&smu->sensor_lock); return ret; } static bool renoir_is_dpm_running(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; /* * Util now, the pmfw hasn't exported the interface of SMU * feature mask to APU SKU so just force on all the feature * at early initial stage. */ if (adev->in_suspend) return false; else return true; } static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_msg_index = renoir_get_smu_msg_index, .get_smu_clk_index = renoir_get_smu_clk_index, .get_smu_table_index = renoir_get_smu_table_index, .tables_init = renoir_tables_init, .set_power_state = NULL, .get_dpm_clk_limited = renoir_get_dpm_clk_limited, .print_clk_levels = renoir_print_clk_levels, .get_current_power_state = renoir_get_current_power_state, .dpm_set_uvd_enable = renoir_dpm_set_uvd_enable, .dpm_set_jpeg_enable = renoir_dpm_set_jpeg_enable, .get_current_clk_freq_by_table = renoir_get_current_clk_freq_by_table, .force_dpm_limit_value = renoir_force_dpm_limit_value, .unforce_dpm_levels = renoir_unforce_dpm_levels, .get_workload_type = renoir_get_workload_type, .get_profiling_clk_mask = renoir_get_profiling_clk_mask, .force_clk_levels = renoir_force_clk_levels, .set_power_profile_mode = renoir_set_power_profile_mode, .set_performance_level = renoir_set_performance_level, .get_dpm_clock_table = renoir_get_dpm_clock_table, .set_watermarks_table = renoir_set_watermarks_table, .get_power_profile_mode = renoir_get_power_profile_mode, .read_sensor = renoir_read_sensor, .check_fw_status = smu_v12_0_check_fw_status, .check_fw_version = smu_v12_0_check_fw_version, .powergate_sdma = smu_v12_0_powergate_sdma, .powergate_vcn = smu_v12_0_powergate_vcn, .powergate_jpeg = smu_v12_0_powergate_jpeg, .send_smc_msg_with_param = smu_v12_0_send_msg_with_param, .set_gfx_cgpg = smu_v12_0_set_gfx_cgpg, .gfx_off_control = smu_v12_0_gfx_off_control, .init_smc_tables = smu_v12_0_init_smc_tables, .fini_smc_tables = smu_v12_0_fini_smc_tables, .populate_smc_tables = smu_v12_0_populate_smc_tables, .get_enabled_mask = smu_v12_0_get_enabled_mask, .get_current_clk_freq = smu_v12_0_get_current_clk_freq, .get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq, .mode2_reset = smu_v12_0_mode2_reset, .set_soft_freq_limited_range = smu_v12_0_set_soft_freq_limited_range, .set_driver_table_location = smu_v12_0_set_driver_table_location, .is_dpm_running = renoir_is_dpm_running, }; void renoir_set_ppt_funcs(struct smu_context *smu) { smu->ppt_funcs = &renoir_ppt_funcs; smu->smc_if_version = SMU12_DRIVER_IF_VERSION; smu->is_apu = true; }
57 44 16 16 16 111 5 1 4 3 3 6 6 6 2 1 3 3 3 3 3 3 3 13 13 13 13 13 3 14 1 9 9 4 6 1 6 4 125 8 40 3213 1 4 9 94 6 28 28 19 19 18 18 1 169 2 141 2 22 3 25 107 135 3 122 44 139 113 2 72 41 5 30 1 31 117 3170 3165 3178 1734 3171 3168 138 121 4 23 120 30 128 1 20 113 2 114 13 20 8 20 125 13 23 9 128 23 125 33 100 100 46 5 3 5 4 16 60 50 75 32 13 106 22 108 6 18 18 4 2 2 38 16 21 17 36 1 1 36 36 34 36 3 35 9 8 13 12 12 12 36 40 6 35 2 2 8 7 3 1 4 19 18 13 5 19 19 19 19 19 3022 3031 3171 6 2 4 18 111 110 46 36 116 116 116 3098 3161 5112 5116 5112 3213 3204 32 32 23 24 24 229 227 170 59 36 28 125 110 20 79 68 13 78 79 63 18 79 13 68 3149 3151 3171 18 18 17 8 17 16 5 5 79 82 4 4 3 4 26 31 27 3274 5 5 5 5 5 4 5 4 5 49 4 4 13 /* $OpenBSD: kern_event.c,v 1.162 2021/02/27 13:43:16 visa Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ */ #include <sys/param.h> #include <sys/systm.h> #include <sys/atomic.h> #include <sys/kernel.h> #include <sys/proc.h> #include <sys/pledge.h> #include <sys/malloc.h> #include <sys/unistd.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/fcntl.h> #include <sys/selinfo.h> #include <sys/queue.h> #include <sys/event.h> #include <sys/eventvar.h> #include <sys/ktrace.h> #include <sys/pool.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/stat.h> #include <sys/uio.h> #include <sys/mount.h> #include <sys/poll.h> #include <sys/syscallargs.h> #include <sys/time.h> #include <sys/timeout.h> #include <sys/wait.h> #ifdef DIAGNOSTIC #define KLIST_ASSERT_LOCKED(kl) do { \ if ((kl)->kl_ops != NULL) \ (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ else \ KERNEL_ASSERT_LOCKED(); \ } while (0) #else #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) #endif struct kqueue *kqueue_alloc(struct filedesc *); void kqueue_terminate(struct proc *p, struct kqueue *); void kqueue_init(void); void KQREF(struct kqueue *); void KQRELE(struct kqueue *); int kqueue_sleep(struct kqueue *, struct timespec *); int kqueue_read(struct file *, struct uio *, int); int kqueue_write(struct file *, struct uio *, int); int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p); int kqueue_poll(struct file *fp, int events, struct proc *p); int kqueue_kqfilter(struct file *fp, struct knote *kn); int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); int kqueue_close(struct file *fp, struct proc *p); void kqueue_wakeup(struct kqueue *kq); #ifdef KQUEUE_DEBUG void kqueue_do_check(struct kqueue *kq, const char *func, int line); #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) #else #define kqueue_check(kq) do {} while (0) #endif void kqpoll_dequeue(struct proc *p); static int filter_attach(struct knote *kn); static void filter_detach(struct knote *kn); static int filter_event(struct knote *kn, long hint); static int filter_modify(struct kevent *kev, struct knote *kn); static int filter_process(struct knote *kn, struct kevent *kev); static void kqueue_expand_hash(struct kqueue *kq); static void kqueue_expand_list(struct kqueue *kq, int fd); static void kqueue_task(void *); static int klist_lock(struct klist *); static void klist_unlock(struct klist *, int); const struct fileops kqueueops = { .fo_read = kqueue_read, .fo_write = kqueue_write, .fo_ioctl = kqueue_ioctl, .fo_poll = kqueue_poll, .fo_kqfilter = kqueue_kqfilter, .fo_stat = kqueue_stat, .fo_close = kqueue_close }; void knote_attach(struct knote *kn); void knote_detach(struct knote *kn); void knote_drop(struct knote *kn, struct proc *p); void knote_enqueue(struct knote *kn); void knote_dequeue(struct knote *kn); int knote_acquire(struct knote *kn, struct klist *, int); void knote_release(struct knote *kn); void knote_activate(struct knote *kn); void knote_remove(struct proc *p, struct knlist *list, int purge); void filt_kqdetach(struct knote *kn); int filt_kqueue(struct knote *kn, long hint); int filt_procattach(struct knote *kn); void filt_procdetach(struct knote *kn); int filt_proc(struct knote *kn, long hint); int filt_fileattach(struct knote *kn); void filt_timerexpire(void *knx); int filt_timerattach(struct knote *kn); void filt_timerdetach(struct knote *kn); int filt_timer(struct knote *kn, long hint); void filt_seltruedetach(struct knote *kn); const struct filterops kqread_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_kqdetach, .f_event = filt_kqueue, }; const struct filterops proc_filtops = { .f_flags = 0, .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, }; const struct filterops file_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = filt_fileattach, .f_detach = NULL, .f_event = NULL, }; const struct filterops timer_filtops = { .f_flags = 0, .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = filt_timer, }; struct pool knote_pool; struct pool kqueue_pool; int kq_ntimeouts = 0; int kq_timeoutmax = (4 * 1024); #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) /* * Table for for all system-defined filters. */ const struct filterops *const sysfilt_ops[] = { &file_filtops, /* EVFILT_READ */ &file_filtops, /* EVFILT_WRITE */ NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ &file_filtops, /* EVFILT_VNODE */ &proc_filtops, /* EVFILT_PROC */ &sig_filtops, /* EVFILT_SIGNAL */ &timer_filtops, /* EVFILT_TIMER */ &file_filtops, /* EVFILT_DEVICE */ &file_filtops, /* EVFILT_EXCEPT */ }; void KQREF(struct kqueue *kq) { atomic_inc_int(&kq->kq_refs); } void KQRELE(struct kqueue *kq) { struct filedesc *fdp; if (atomic_dec_int_nv(&kq->kq_refs) > 0) return; fdp = kq->kq_fdp; if (rw_status(&fdp->fd_lock) == RW_WRITE) { LIST_REMOVE(kq, kq_next); } else { fdplock(fdp); LIST_REMOVE(kq, kq_next); fdpunlock(fdp); } KASSERT(TAILQ_EMPTY(&kq->kq_head)); free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * sizeof(struct knlist)); hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); pool_put(&kqueue_pool, kq); } void kqueue_init(void) { pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, PR_WAITOK, "kqueuepl", NULL); pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, PR_WAITOK, "knotepl", NULL); } int filt_fileattach(struct knote *kn) { struct file *fp = kn->kn_fp; return fp->f_ops->fo_kqfilter(fp, kn); } int kqueue_kqfilter(struct file *fp, struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; if (kn->kn_filter != EVFILT_READ) return (EINVAL); kn->kn_fop = &kqread_filtops; klist_insert_locked(&kq->kq_sel.si_note, kn); return (0); } void filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; klist_remove_locked(&kq->kq_sel.si_note, kn); } int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; kn->kn_data = kq->kq_count; return (kn->kn_data > 0); } int filt_procattach(struct knote *kn) { struct process *pr; int s; if ((curproc->p_p->ps_flags & PS_PLEDGE) && (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) return pledge_fail(curproc, EPERM, PLEDGE_PROC); if (kn->kn_id > PID_MAX) return ESRCH; pr = prfind(kn->kn_id); if (pr == NULL) return (ESRCH); /* exiting processes can't be specified */ if (pr->ps_flags & PS_EXITING) return (ESRCH); kn->kn_ptr.p_process = pr; kn->kn_flags |= EV_CLEAR; /* automatically set */ /* * internal flag indicating registration done by kernel */ if (kn->kn_flags & EV_FLAG1) { kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; kn->kn_flags &= ~EV_FLAG1; } s = splhigh(); klist_insert_locked(&pr->ps_klist, kn); splx(s); return (0); } /* * The knote may be attached to a different process, which may exit, * leaving nothing for the knote to be attached to. So when the process * exits, the knote is marked as DETACHED and also flagged as ONESHOT so * it will be deleted when read out. However, as part of the knote deletion, * this routine is called, so a check is needed to avoid actually performing * a detach, because the original process does not exist any more. */ void filt_procdetach(struct knote *kn) { struct process *pr = kn->kn_ptr.p_process; int s; if (kn->kn_status & KN_DETACHED) return; s = splhigh(); klist_remove_locked(&pr->ps_klist, kn); splx(s); } int filt_proc(struct knote *kn, long hint) { u_int event; /* * mask off extra data */ event = (u_int)hint & NOTE_PCTRLMASK; /* * if the user is interested in this event, record it. */ if (kn->kn_sfflags & event) kn->kn_fflags |= event; /* * process is gone, so flag the event as finished and remove it * from the process's klist */ if (event == NOTE_EXIT) { struct process *pr = kn->kn_ptr.p_process; int s; s = splhigh(); kn->kn_status |= KN_DETACHED; kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); klist_remove_locked(&pr->ps_klist, kn); splx(s); return (1); } /* * process forked, and user wants to track the new process, * so attach a new knote to it, and immediately report an * event with the parent's pid. */ if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { struct kevent kev; int error; /* * register knote with new process. */ memset(&kev, 0, sizeof(kev)); kev.ident = hint & NOTE_PDATAMASK; /* pid */ kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_udata; /* preserve udata */ error = kqueue_register(kn->kn_kq, &kev, NULL); if (error) kn->kn_fflags |= NOTE_TRACKERR; } return (kn->kn_fflags != 0); } static void filt_timer_timeout_add(struct knote *kn) { struct timeval tv; struct timeout *to = kn->kn_hook; int tticks; tv.tv_sec = kn->kn_sdata / 1000; tv.tv_usec = (kn->kn_sdata % 1000) * 1000; tticks = tvtohz(&tv); /* Remove extra tick from tvtohz() if timeout has fired before. */ if (timeout_triggered(to)) tticks--; timeout_add(to, (tticks > 0) ? tticks : 1); } void filt_timerexpire(void *knx) { struct knote *kn = knx; kn->kn_data++; knote_activate(kn); if ((kn->kn_flags & EV_ONESHOT) == 0) filt_timer_timeout_add(kn); } /* * data contains amount of time to sleep, in milliseconds */ int filt_timerattach(struct knote *kn) { struct timeout *to; if (kq_ntimeouts > kq_timeoutmax) return (ENOMEM); kq_ntimeouts++; kn->kn_flags |= EV_CLEAR; /* automatically set */ to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); timeout_set(to, filt_timerexpire, kn); kn->kn_hook = to; filt_timer_timeout_add(kn); return (0); } void filt_timerdetach(struct knote *kn) { struct timeout *to; to = (struct timeout *)kn->kn_hook; timeout_del(to); free(to, M_KEVENT, sizeof(*to)); kq_ntimeouts--; } int filt_timer(struct knote *kn, long hint) { return (kn->kn_data != 0); } /* * filt_seltrue: * * This filter "event" routine simulates seltrue(). */ int filt_seltrue(struct knote *kn, long hint) { /* * We don't know how much data can be read/written, * but we know that it *can* be. This is about as * good as select/poll does as well. */ kn->kn_data = 0; return (1); } int filt_seltruemodify(struct kevent *kev, struct knote *kn) { knote_modify(kev, kn); return (1); } int filt_seltrueprocess(struct knote *kn, struct kevent *kev) { knote_submit(kn, kev); return (1); } /* * This provides full kqfilter entry for device switch tables, which * has same effect as filter using filt_seltrue() as filter method. */ void filt_seltruedetach(struct knote *kn) { /* Nothing to do */ } const struct filterops seltrue_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_seltruedetach, .f_event = filt_seltrue, .f_modify = filt_seltruemodify, .f_process = filt_seltrueprocess, }; int seltrue_kqfilter(dev_t dev, struct knote *kn) { switch (kn->kn_filter) { case EVFILT_READ: case EVFILT_WRITE: kn->kn_fop = &seltrue_filtops; break; default: return (EINVAL); } /* Nothing more to do */ return (0); } static int filt_dead(struct knote *kn, long hint) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); if (kn->kn_flags & __EV_POLL) kn->kn_flags |= __EV_HUP; kn->kn_data = 0; return (1); } static void filt_deaddetach(struct knote *kn) { /* Nothing to do */ } const struct filterops dead_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_dead, .f_modify = filt_seltruemodify, .f_process = filt_seltrueprocess, }; static int filt_badfd(struct knote *kn, long hint) { kn->kn_flags |= (EV_ERROR | EV_ONESHOT); kn->kn_data = EBADF; return (1); } /* For use with kqpoll. */ const struct filterops badfd_filtops = { .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_badfd, .f_modify = filt_seltruemodify, .f_process = filt_seltrueprocess, }; static int filter_attach(struct knote *kn) { int error; if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { error = kn->kn_fop->f_attach(kn); } else { KERNEL_LOCK(); error = kn->kn_fop->f_attach(kn); KERNEL_UNLOCK(); } return (error); } static void filter_detach(struct knote *kn) { if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { kn->kn_fop->f_detach(kn); } else { KERNEL_LOCK(); kn->kn_fop->f_detach(kn); KERNEL_UNLOCK(); } } static int filter_event(struct knote *kn, long hint) { if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) KERNEL_ASSERT_LOCKED(); return (kn->kn_fop->f_event(kn, hint)); } static int filter_modify(struct kevent *kev, struct knote *kn) { int active, s; if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { active = kn->kn_fop->f_modify(kev, kn); } else { KERNEL_LOCK(); if (kn->kn_fop->f_modify != NULL) { active = kn->kn_fop->f_modify(kev, kn); } else { /* Emulate f_modify using f_event. */ s = splhigh(); knote_modify(kev, kn); active = kn->kn_fop->f_event(kn, 0); splx(s); } KERNEL_UNLOCK(); } return (active); } static int filter_process(struct knote *kn, struct kevent *kev) { int active, s; if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { active = kn->kn_fop->f_process(kn, kev); } else { KERNEL_LOCK(); if (kn->kn_fop->f_process != NULL) { active = kn->kn_fop->f_process(kn, kev); } else { /* Emulate f_process using f_event. */ s = splhigh(); /* * If called from kqueue_scan(), skip f_event * when EV_ONESHOT is set, to preserve old behaviour. */ if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) active = 1; else active = kn->kn_fop->f_event(kn, 0); if (active) knote_submit(kn, kev); splx(s); } KERNEL_UNLOCK(); } return (active); } void kqpoll_init(void) { struct proc *p = curproc; struct filedesc *fdp; if (p->p_kq != NULL) { /* * Discard any knotes that have been enqueued after * previous scan. * This prevents accumulation of enqueued badfd knotes * in case scan does not make progress for some reason. */ kqpoll_dequeue(p); return; } p->p_kq = kqueue_alloc(p->p_fd); p->p_kq_serial = arc4random(); fdp = p->p_fd; fdplock(fdp); LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); fdpunlock(fdp); } void kqpoll_exit(void) { struct proc *p = curproc; if (p->p_kq == NULL) return; kqueue_purge(p, p->p_kq); /* Clear any detached knotes that remain in the queue. */ kqpoll_dequeue(p); kqueue_terminate(p, p->p_kq); KASSERT(p->p_kq->kq_refs == 1); KQRELE(p->p_kq); p->p_kq = NULL; } void kqpoll_dequeue(struct proc *p) { struct knote *kn; struct kqueue *kq = p->p_kq; int s; s = splhigh(); while ((kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { /* This kqueue should not be scanned by other threads. */ KASSERT(kn->kn_filter != EVFILT_MARKER); if (!knote_acquire(kn, NULL, 0)) continue; kqueue_check(kq); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; splx(s); kn->kn_fop->f_detach(kn); knote_drop(kn, p); s = splhigh(); kqueue_check(kq); } splx(s); } struct kqueue * kqueue_alloc(struct filedesc *fdp) { struct kqueue *kq; kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); kq->kq_refs = 1; kq->kq_fdp = fdp; TAILQ_INIT(&kq->kq_head); task_set(&kq->kq_task, kqueue_task, kq); return (kq); } int sys_kqueue(struct proc *p, void *v, register_t *retval) { struct filedesc *fdp = p->p_fd; struct kqueue *kq; struct file *fp; int fd, error; kq = kqueue_alloc(fdp); fdplock(fdp); error = falloc(p, &fp, &fd); if (error) goto out; fp->f_flag = FREAD | FWRITE; fp->f_type = DTYPE_KQUEUE; fp->f_ops = &kqueueops; fp->f_data = kq; *retval = fd; LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); kq = NULL; fdinsert(fdp, fd, 0, fp); FRELE(fp, p); out: fdpunlock(fdp); if (kq != NULL) pool_put(&kqueue_pool, kq); return (error); } int sys_kevent(struct proc *p, void *v, register_t *retval) { struct kqueue_scan_state scan; struct filedesc* fdp = p->p_fd; struct sys_kevent_args /* { syscallarg(int) fd; syscallarg(const struct kevent *) changelist; syscallarg(int) nchanges; syscallarg(struct kevent *) eventlist; syscallarg(int) nevents; syscallarg(const struct timespec *) timeout; } */ *uap = v; struct kevent *kevp; struct kqueue *kq; struct file *fp; struct timespec ts; struct timespec *tsp = NULL; int i, n, nerrors, error; int ready, total; struct kevent kev[KQ_NEVENTS]; if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) return (EBADF); if (fp->f_type != DTYPE_KQUEUE) { error = EBADF; goto done; } if (SCARG(uap, timeout) != NULL) { error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); if (error) goto done; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { error = EINVAL; goto done; } tsp = &ts; } kq = fp->f_data; nerrors = 0; while ((n = SCARG(uap, nchanges)) > 0) { if (n > nitems(kev)) n = nitems(kev); error = copyin(SCARG(uap, changelist), kev, n * sizeof(struct kevent)); if (error) goto done; #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrevent(p, kev, n); #endif for (i = 0; i < n; i++) { kevp = &kev[i]; kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp, p); if (error || (kevp->flags & EV_RECEIPT)) { if (SCARG(uap, nevents) != 0) { kevp->flags = EV_ERROR; kevp->data = error; copyout(kevp, SCARG(uap, eventlist), sizeof(*kevp)); SCARG(uap, eventlist)++; SCARG(uap, nevents)--; nerrors++; } else { goto done; } } } SCARG(uap, nchanges) -= n; SCARG(uap, changelist) += n; } if (nerrors) { *retval = nerrors; error = 0; goto done; } kqueue_scan_setup(&scan, kq); FRELE(fp, p); /* * Collect as many events as we can. The timeout on successive * loops is disabled (kqueue_scan() becomes non-blocking). */ total = 0; error = 0; while ((n = SCARG(uap, nevents) - total) > 0) { if (n > nitems(kev)) n = nitems(kev); ready = kqueue_scan(&scan, n, kev, tsp, p, &error); if (ready == 0) break; error = copyout(kev, SCARG(uap, eventlist) + total, sizeof(struct kevent) * ready); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrevent(p, kev, ready); #endif total += ready; if (error || ready < n) break; } kqueue_scan_finish(&scan); *retval = total; return (error); done: FRELE(fp, p); return (error); } #ifdef KQUEUE_DEBUG void kqueue_do_check(struct kqueue *kq, const char *func, int line) { struct knote *kn; int count = 0, nmarker = 0; KERNEL_ASSERT_LOCKED(); splassert(IPL_HIGH); TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { if (kn->kn_filter == EVFILT_MARKER) { if ((kn->kn_status & KN_QUEUED) != 0) panic("%s:%d: kq=%p kn=%p marker QUEUED", func, line, kq, kn); nmarker++; } else { if ((kn->kn_status & KN_ACTIVE) == 0) panic("%s:%d: kq=%p kn=%p knote !ACTIVE", func, line, kq, kn); if ((kn->kn_status & KN_QUEUED) == 0) panic("%s:%d: kq=%p kn=%p knote !QUEUED", func, line, kq, kn); if (kn->kn_kq != kq) panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", func, line, kq, kn, kn->kn_kq); count++; if (count > kq->kq_count) goto bad; } } if (count != kq->kq_count) { bad: panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", func, line, kq, kq->kq_count, count, nmarker); } } #endif int kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) { struct filedesc *fdp = kq->kq_fdp; const struct filterops *fops = NULL; struct file *fp = NULL; struct knote *kn = NULL, *newkn = NULL; struct knlist *list = NULL; int s, error = 0; if (kev->filter < 0) { if (kev->filter + EVFILT_SYSCOUNT < 0) return (EINVAL); fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ } if (fops == NULL) { /* * XXX * filter attach routine is responsible for ensuring that * the identifier can be attached to it. */ return (EINVAL); } if (fops->f_flags & FILTEROP_ISFD) { /* validate descriptor */ if (kev->ident > INT_MAX) return (EBADF); } if (kev->flags & EV_ADD) newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); again: if (fops->f_flags & FILTEROP_ISFD) { if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { error = EBADF; goto done; } if (kev->flags & EV_ADD) kqueue_expand_list(kq, kev->ident); if (kev->ident < kq->kq_knlistsize) list = &kq->kq_knlist[kev->ident]; } else { if (kev->flags & EV_ADD) kqueue_expand_hash(kq); if (kq->kq_knhashmask != 0) { list = &kq->kq_knhash[ KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; } } if (list != NULL) { SLIST_FOREACH(kn, list, kn_link) { if (kev->filter == kn->kn_filter && kev->ident == kn->kn_id) { s = splhigh(); if (!knote_acquire(kn, NULL, 0)) { splx(s); if (fp != NULL) { FRELE(fp, p); fp = NULL; } goto again; } splx(s); break; } } } KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { error = ENOENT; goto done; } /* * kn now contains the matching knote, or NULL if no match. * If adding a new knote, sleeping is not allowed until the knote * has been inserted. */ if (kev->flags & EV_ADD) { if (kn == NULL) { kn = newkn; newkn = NULL; kn->kn_status = KN_PROCESSING; kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; /* * apply reference count to knote structure, and * do not release it at the end of this routine. */ fp = NULL; kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; kev->data = 0; kn->kn_kevent = *kev; knote_attach(kn); error = filter_attach(kn); if (error != 0) { knote_drop(kn, p); goto done; } /* * If this is a file descriptor filter, check if * fd was closed while the knote was being added. * knote_fdclose() has missed kn if the function * ran before kn appeared in kq_knlist. */ if ((fops->f_flags & FILTEROP_ISFD) && fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { /* * Drop the knote silently without error * because another thread might already have * seen it. This corresponds to the insert * happening in full before the close. */ filter_detach(kn); knote_drop(kn, p); goto done; } /* Check if there is a pending event. */ if (filter_process(kn, NULL)) knote_activate(kn); } else { /* * The user may change some filter values after the * initial EV_ADD, but doing so will not reset any * filters which have already been triggered. */ if (filter_modify(kev, kn)) knote_activate(kn); if (kev->flags & EV_ERROR) { error = kev->data; goto release; } } } else if (kev->flags & EV_DELETE) { filter_detach(kn); knote_drop(kn, p); goto done; } if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) { s = splhigh(); kn->kn_status |= KN_DISABLED; splx(s); } if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { s = splhigh(); kn->kn_status &= ~KN_DISABLED; splx(s); /* Check if there is a pending event. */ if (filter_process(kn, NULL)) knote_activate(kn); } release: s = splhigh(); knote_release(kn); splx(s); done: if (fp != NULL) FRELE(fp, p); if (newkn != NULL) pool_put(&knote_pool, newkn); return (error); } int kqueue_sleep(struct kqueue *kq, struct timespec *tsp) { struct timespec elapsed, start, stop; uint64_t nsecs; int error; splassert(IPL_HIGH); if (tsp != NULL) { getnanouptime(&start); nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); } else nsecs = INFSLP; error = tsleep_nsec(kq, PSOCK | PCATCH, "kqread", nsecs); if (tsp != NULL) { getnanouptime(&stop); timespecsub(&stop, &start, &elapsed); timespecsub(tsp, &elapsed, tsp); if (tsp->tv_sec < 0) timespecclear(tsp); } return (error); } /* * Scan the kqueue, blocking if necessary until the target time is reached. * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both * 0 we do not block at all. */ int kqueue_scan(struct kqueue_scan_state *scan, int maxevents, struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) { struct kqueue *kq = scan->kqs_kq; struct knote *kn; int s, error = 0, nkev = 0; if (maxevents == 0) goto done; retry: KASSERT(nkev == 0); error = 0; if (kq->kq_state & KQ_DYING) { error = EBADF; goto done; } s = splhigh(); if (kq->kq_count == 0) { /* * Successive loops are only necessary if there are more * ready events to gather, so they don't need to block. */ if ((tsp != NULL && !timespecisset(tsp)) || scan->kqs_nevent != 0) { splx(s); error = 0; goto done; } kq->kq_state |= KQ_SLEEP; error = kqueue_sleep(kq, tsp); splx(s); if (error == 0 || error == EWOULDBLOCK) goto retry; /* don't restart after signals... */ if (error == ERESTART) error = EINTR; goto done; } /* * Put the end marker in the queue to limit the scan to the events * that are currently active. This prevents events from being * recollected if they reactivate during scan. * * If a partial scan has been performed already but no events have * been collected, reposition the end marker to make any new events * reachable. */ if (!scan->kqs_queued) { TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); scan->kqs_queued = 1; } else if (scan->kqs_nevent == 0) { TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); } TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); while (nkev < maxevents) { kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); if (kn->kn_filter == EVFILT_MARKER) { if (kn == &scan->kqs_end) break; /* Move start marker past another thread's marker. */ TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, kn_tqe); continue; } if (!knote_acquire(kn, NULL, 0)) continue; kqueue_check(kq); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; kqueue_check(kq); if (kn->kn_status & KN_DISABLED) { knote_release(kn); continue; } splx(s); memset(kevp, 0, sizeof(*kevp)); if (filter_process(kn, kevp) == 0) { s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; knote_release(kn); kqueue_check(kq); continue; } /* * Post-event action on the note */ if (kevp->flags & EV_ONESHOT) { filter_detach(kn); knote_drop(kn, p); s = splhigh(); } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { s = splhigh(); if (kevp->flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } else { s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) { kqueue_check(kq); kq->kq_count++; kn->kn_status |= KN_QUEUED; TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } kqueue_check(kq); kevp++; nkev++; scan->kqs_nevent++; } TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); splx(s); if (scan->kqs_nevent == 0) goto retry; done: *errorp = error; return (nkev); } void kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) { memset(scan, 0, sizeof(*scan)); KQREF(kq); scan->kqs_kq = kq; scan->kqs_start.kn_filter = EVFILT_MARKER; scan->kqs_start.kn_status = KN_PROCESSING; scan->kqs_end.kn_filter = EVFILT_MARKER; scan->kqs_end.kn_status = KN_PROCESSING; } void kqueue_scan_finish(struct kqueue_scan_state *scan) { struct kqueue *kq = scan->kqs_kq; int s; KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); if (scan->kqs_queued) { scan->kqs_queued = 0; s = splhigh(); TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); splx(s); } KQRELE(kq); } /* * XXX * This could be expanded to call kqueue_scan, if desired. */ int kqueue_read(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } int kqueue_write(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) { return (ENOTTY); } int kqueue_poll(struct file *fp, int events, struct proc *p) { struct kqueue *kq = (struct kqueue *)fp->f_data; int revents = 0; int s = splhigh(); if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); } else { selrecord(p, &kq->kq_sel); kq->kq_state |= KQ_SEL; } } splx(s); return (revents); } int kqueue_stat(struct file *fp, struct stat *st, struct proc *p) { struct kqueue *kq = fp->f_data; memset(st, 0, sizeof(*st)); st->st_size = kq->kq_count; st->st_blksize = sizeof(struct kevent); st->st_mode = S_IFIFO; return (0); } void kqueue_purge(struct proc *p, struct kqueue *kq) { int i; KERNEL_ASSERT_LOCKED(); for (i = 0; i < kq->kq_knlistsize; i++) knote_remove(p, &kq->kq_knlist[i], 1); if (kq->kq_knhashmask != 0) { for (i = 0; i < kq->kq_knhashmask + 1; i++) knote_remove(p, &kq->kq_knhash[i], 1); } } void kqueue_terminate(struct proc *p, struct kqueue *kq) { struct knote *kn; /* * Any remaining entries should be scan markers. * They are removed when the ongoing scans finish. */ KASSERT(kq->kq_count == 0); TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) KASSERT(kn->kn_filter == EVFILT_MARKER); kq->kq_state |= KQ_DYING; kqueue_wakeup(kq); KASSERT(klist_empty(&kq->kq_sel.si_note)); task_del(systq, &kq->kq_task); } int kqueue_close(struct file *fp, struct proc *p) { struct kqueue *kq = fp->f_data; KERNEL_LOCK(); kqueue_purge(p, kq); kqueue_terminate(p, kq); fp->f_data = NULL; KQRELE(kq); KERNEL_UNLOCK(); return (0); } static void kqueue_task(void *arg) { struct kqueue *kq = arg; if (kq->kq_state & KQ_SEL) { kq->kq_state &= ~KQ_SEL; selwakeup(&kq->kq_sel); } else { KNOTE(&kq->kq_sel.si_note, 0); } KQRELE(kq); } void kqueue_wakeup(struct kqueue *kq) { if (kq->kq_state & KQ_SLEEP) { kq->kq_state &= ~KQ_SLEEP; wakeup(kq); } if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { /* Defer activation to avoid recursion. */ KQREF(kq); if (!task_add(systq, &kq->kq_task)) KQRELE(kq); } } static void kqueue_expand_hash(struct kqueue *kq) { struct knlist *hash; u_long hashmask; if (kq->kq_knhashmask == 0) { hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); if (kq->kq_knhashmask == 0) { kq->kq_knhash = hash; kq->kq_knhashmask = hashmask; } else { /* Another thread has allocated the hash. */ hashfree(hash, KN_HASHSIZE, M_KEVENT); } } } static void kqueue_expand_list(struct kqueue *kq, int fd) { struct knlist *list; int size; if (kq->kq_knlistsize <= fd) { size = kq->kq_knlistsize; while (size <= fd) size += KQEXTENT; list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); if (kq->kq_knlistsize <= fd) { memcpy(list, kq->kq_knlist, kq->kq_knlistsize * sizeof(*list)); memset(&list[kq->kq_knlistsize], 0, (size - kq->kq_knlistsize) * sizeof(*list)); free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * sizeof(*list)); kq->kq_knlist = list; kq->kq_knlistsize = size; } else { /* Another thread has expanded the list. */ free(list, M_KEVENT, size * sizeof(*list)); } } } /* * Acquire a knote, return non-zero on success, 0 on failure. * * If we cannot acquire the knote we sleep and return 0. The knote * may be stale on return in this case and the caller must restart * whatever loop they are in. * * If we are about to sleep and klist is non-NULL, the list is unlocked * before sleep and remains unlocked on return. */ int knote_acquire(struct knote *kn, struct klist *klist, int ls) { splassert(IPL_HIGH); KASSERT(kn->kn_filter != EVFILT_MARKER); if (kn->kn_status & KN_PROCESSING) { kn->kn_status |= KN_WAITING; if (klist != NULL) klist_unlock(klist, ls); tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); /* knote may be stale now */ return (0); } kn->kn_status |= KN_PROCESSING; return (1); } /* * Release an acquired knote, clearing KN_PROCESSING. */ void knote_release(struct knote *kn) { splassert(IPL_HIGH); KASSERT(kn->kn_filter != EVFILT_MARKER); KASSERT(kn->kn_status & KN_PROCESSING); if (kn->kn_status & KN_WAITING) { kn->kn_status &= ~KN_WAITING; wakeup(kn); } kn->kn_status &= ~KN_PROCESSING; /* kn should not be accessed anymore */ } /* * activate one knote. */ void knote_activate(struct knote *kn) { int s; s = splhigh(); kn->kn_status |= KN_ACTIVE; if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) knote_enqueue(kn); splx(s); } /* * walk down a list of knotes, activating them if their event has triggered. */ void knote(struct klist *list, long hint) { struct knote *kn, *kn0; KLIST_ASSERT_LOCKED(list); SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) if (filter_event(kn, hint)) knote_activate(kn); } /* * remove all knotes from a specified knlist */ void knote_remove(struct proc *p, struct knlist *list, int purge) { struct knote *kn; int s; while ((kn = SLIST_FIRST(list)) != NULL) { s = splhigh(); if (!knote_acquire(kn, NULL, 0)) { splx(s); continue; } splx(s); filter_detach(kn); /* * Notify poll(2) and select(2) when a monitored * file descriptor is closed. * * This reuses the original knote for delivering the * notification so as to avoid allocating memory. * The knote will be reachable only through the queue * of active knotes and is freed either by kqueue_scan() * or kqpoll_dequeue(). */ if (!purge && (kn->kn_flags & __EV_POLL) != 0) { KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); knote_detach(kn); FRELE(kn->kn_fp, p); kn->kn_fp = NULL; kn->kn_fop = &badfd_filtops; filter_event(kn, 0); knote_activate(kn); s = splhigh(); knote_release(kn); splx(s); continue; } knote_drop(kn, p); } } /* * remove all knotes referencing a specified fd */ void knote_fdclose(struct proc *p, int fd) { struct filedesc *fdp = p->p_p->ps_fd; struct kqueue *kq; struct knlist *list; /* * fdplock can be ignored if the file descriptor table is being freed * because no other thread can access the fdp. */ if (fdp->fd_refcnt != 0) fdpassertlocked(fdp); if (LIST_EMPTY(&fdp->fd_kqlist)) return; KERNEL_LOCK(); LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { if (fd >= kq->kq_knlistsize) continue; list = &kq->kq_knlist[fd]; knote_remove(p, list, 0); } KERNEL_UNLOCK(); } /* * handle a process exiting, including the triggering of NOTE_EXIT notes * XXX this could be more efficient, doing a single pass down the klist */ void knote_processexit(struct proc *p) { struct process *pr = p->p_p; KASSERT(p == curproc); KNOTE(&pr->ps_klist, NOTE_EXIT); /* remove other knotes hanging off the process */ klist_invalidate(&pr->ps_klist); } void knote_attach(struct knote *kn) { struct kqueue *kq = kn->kn_kq; struct knlist *list; int s; KASSERT(kn->kn_status & KN_PROCESSING); KASSERT((kn->kn_status & KN_ATTACHED) == 0); s = splhigh(); kn->kn_status |= KN_ATTACHED; splx(s); if (kn->kn_fop->f_flags & FILTEROP_ISFD) { KASSERT(kq->kq_knlistsize > kn->kn_id); list = &kq->kq_knlist[kn->kn_id]; } else { KASSERT(kq->kq_knhashmask != 0); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } SLIST_INSERT_HEAD(list, kn, kn_link); } void knote_detach(struct knote *kn) { struct kqueue *kq = kn->kn_kq; struct knlist *list; int s; KASSERT(kn->kn_status & KN_PROCESSING); if ((kn->kn_status & KN_ATTACHED) == 0) return; if (kn->kn_fop->f_flags & FILTEROP_ISFD) list = &kq->kq_knlist[kn->kn_id]; else list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; SLIST_REMOVE(list, kn, knote, kn_link); s = splhigh(); kn->kn_status &= ~KN_ATTACHED; splx(s); } /* * should be called at spl == 0, since we don't want to hold spl * while calling FRELE and pool_put. */ void knote_drop(struct knote *kn, struct proc *p) { int s; KASSERT(kn->kn_filter != EVFILT_MARKER); knote_detach(kn); s = splhigh(); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); if (kn->kn_status & KN_WAITING) { kn->kn_status &= ~KN_WAITING; wakeup(kn); } splx(s); if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) FRELE(kn->kn_fp, p); pool_put(&knote_pool, kn); } void knote_enqueue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; splassert(IPL_HIGH); KASSERT(kn->kn_filter != EVFILT_MARKER); KASSERT((kn->kn_status & KN_QUEUED) == 0); kqueue_check(kq); TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; kqueue_check(kq); kqueue_wakeup(kq); } void knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; splassert(IPL_HIGH); KASSERT(kn->kn_filter != EVFILT_MARKER); KASSERT(kn->kn_status & KN_QUEUED); kqueue_check(kq); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; kqueue_check(kq); } /* * Modify the knote's parameters. * * The knote's object lock must be held. */ void knote_modify(const struct kevent *kev, struct knote *kn) { kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kn->kn_udata = kev->udata; } /* * Submit the knote's event for delivery. * * The knote's object lock must be held. */ void knote_submit(struct knote *kn, struct kevent *kev) { if (kev != NULL) { *kev = kn->kn_kevent; if (kn->kn_flags & EV_CLEAR) { kn->kn_fflags = 0; kn->kn_data = 0; } } } void klist_init(struct klist *klist, const struct klistops *ops, void *arg) { SLIST_INIT(&klist->kl_list); klist->kl_ops = ops; klist->kl_arg = arg; } void klist_free(struct klist *klist) { KASSERT(SLIST_EMPTY(&klist->kl_list)); } void klist_insert(struct klist *klist, struct knote *kn) { int ls; ls = klist_lock(klist); SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); klist_unlock(klist, ls); } void klist_insert_locked(struct klist *klist, struct knote *kn) { KLIST_ASSERT_LOCKED(klist); SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); } void klist_remove(struct klist *klist, struct knote *kn) { int ls; ls = klist_lock(klist); SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); klist_unlock(klist, ls); } void klist_remove_locked(struct klist *klist, struct knote *kn) { KLIST_ASSERT_LOCKED(klist); SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); } int klist_empty(struct klist *klist) { return (SLIST_EMPTY(&klist->kl_list)); } /* * Detach all knotes from klist. The knotes are rewired to indicate EOF. * * The caller of this function must not hold any locks that can block * filterops callbacks that run with KN_PROCESSING. * Otherwise this function might deadlock. */ void klist_invalidate(struct klist *list) { struct knote *kn; struct proc *p = curproc; int ls, s; NET_ASSERT_UNLOCKED(); s = splhigh(); ls = klist_lock(list); while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { if (!knote_acquire(kn, list, ls)) { /* knote_acquire() has unlocked list. */ ls = klist_lock(list); continue; } klist_unlock(list, ls); splx(s); filter_detach(kn); if (kn->kn_fop->f_flags & FILTEROP_ISFD) { kn->kn_fop = &dead_filtops; filter_event(kn, 0); knote_activate(kn); s = splhigh(); knote_release(kn); } else { knote_drop(kn, p); s = splhigh(); } ls = klist_lock(list); } klist_unlock(list, ls); splx(s); } static int klist_lock(struct klist *list) { int ls = 0; if (list->kl_ops != NULL) { ls = list->kl_ops->klo_lock(list->kl_arg); } else { KERNEL_LOCK(); ls = splhigh(); } return ls; } static void klist_unlock(struct klist *list, int ls) { if (list->kl_ops != NULL) { list->kl_ops->klo_unlock(list->kl_arg, ls); } else { splx(ls); KERNEL_UNLOCK(); } } static void klist_mutex_assertlk(void *arg) { struct mutex *mtx = arg; (void)mtx; MUTEX_ASSERT_LOCKED(mtx); } static int klist_mutex_lock(void *arg) { struct mutex *mtx = arg; mtx_enter(mtx); return 0; } static void klist_mutex_unlock(void *arg, int s) { struct mutex *mtx = arg; mtx_leave(mtx); } static const struct klistops mutex_klistops = { .klo_assertlk = klist_mutex_assertlk, .klo_lock = klist_mutex_lock, .klo_unlock = klist_mutex_unlock, }; void klist_init_mutex(struct klist *klist, struct mutex *mtx) { klist_init(klist, &mutex_klistops, mtx); } static void klist_rwlock_assertlk(void *arg) { struct rwlock *rwl = arg; (void)rwl; rw_assert_wrlock(rwl); } static int klist_rwlock_lock(void *arg) { struct rwlock *rwl = arg; rw_enter_write(rwl); return 0; } static void klist_rwlock_unlock(void *arg, int s) { struct rwlock *rwl = arg; rw_exit_write(rwl); } static const struct klistops rwlock_klistops = { .klo_assertlk = klist_rwlock_assertlk, .klo_lock = klist_rwlock_lock, .klo_unlock = klist_rwlock_unlock, }; void klist_init_rwlock(struct klist *klist, struct rwlock *rwl) { klist_init(klist, &rwlock_klistops, rwl); }
603 604 648 649 274 457 253 647 652 651 603 602 604 602 8028 827 828 830 8020 8024 8022 8029 4916 3567 6772 1476 8051 3429 5072 39 7988 /* $OpenBSD: trap.c,v 1.87 2020/10/22 13:41:51 deraadt Exp $ */ /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)trap.c 7.4 (Berkeley) 5/13/91 */ /* * amd64 Trap and System call handling */ #undef TRAP_SIGDEBUG #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/signalvar.h> #include <sys/user.h> #include <sys/signal.h> #include <sys/syscall.h> #include <sys/syscall_mi.h> #include <sys/stdarg.h> #include <uvm/uvm_extern.h> #include <machine/cpu.h> #include <machine/cpufunc.h> #include <machine/fpu.h> #include <machine/psl.h> #include <machine/trap.h> #ifdef DDB #include <machine/db_machdep.h> #endif #include "isa.h" int upageflttrap(struct trapframe *, uint64_t); int kpageflttrap(struct trapframe *, uint64_t); void kerntrap(struct trapframe *); void usertrap(struct trapframe *); void ast(struct trapframe *); void syscall(struct trapframe *); const char * const trap_type[] = { "privileged instruction fault", /* 0 T_PRIVINFLT */ "breakpoint trap", /* 1 T_BPTFLT */ "arithmetic trap", /* 2 T_ARITHTRAP */ "reserved trap", /* 3 T_RESERVED */ "protection fault", /* 4 T_PROTFLT */ "trace trap", /* 5 T_TRCTRAP */ "page fault", /* 6 T_PAGEFLT */ "alignment fault", /* 7 T_ALIGNFLT */ "integer divide fault", /* 8 T_DIVIDE */ "non-maskable interrupt", /* 9 T_NMI */ "overflow trap", /* 10 T_OFLOW */ "bounds check fault", /* 11 T_BOUND */ "FPU not available fault", /* 12 T_DNA */ "double fault", /* 13 T_DOUBLEFLT */ "FPU operand fetch fault", /* 14 T_FPOPFLT */ "invalid TSS fault", /* 15 T_TSSFLT */ "segment not present fault", /* 16 T_SEGNPFLT */ "stack fault", /* 17 T_STKFLT */ "machine check", /* 18 T_MCA */ "SSE FP exception", /* 19 T_XMM */ }; const int trap_types = nitems(trap_type); #ifdef DEBUG int trapdebug = 0; #endif static void trap_print(struct trapframe *, int _type); static inline void frame_dump(struct trapframe *_tf, struct proc *_p, const char *_sig, uint64_t _cr2); static inline void verify_smap(const char *_func); static inline void debug_trap(struct trapframe *_frame, struct proc *_p, long _type); static inline void fault(const char *format, ...) { static char faultbuf[512]; va_list ap; /* * Save the fault info for DDB. Kernel lock protects * faultbuf from being overwritten by another CPU. */ va_start(ap, format); vsnprintf(faultbuf, sizeof faultbuf, format, ap); va_end(ap); printf("%s\n", faultbuf); faultstr = faultbuf; } static inline int pgex2access(int pgex) { if (pgex & PGEX_W) return PROT_WRITE; else if (pgex & PGEX_I) return PROT_EXEC; return PROT_READ; } /* * upageflttrap(frame, usermode): page fault handler * Returns non-zero if the fault was handled (possibly by generating * a signal). Returns zero, possibly still holding the kernel lock, * if something was so broken that we should panic. */ int upageflttrap(struct trapframe *frame, uint64_t cr2) { struct proc *p = curproc; vaddr_t va = trunc_page((vaddr_t)cr2); vm_prot_t access_type = pgex2access(frame->tf_err); union sigval sv; int signal, sicode, error; KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); KERNEL_UNLOCK(); if (error == 0) { uvm_grow(p, va); return 1; } signal = SIGSEGV; sicode = SEGV_MAPERR; if (error == ENOMEM) { printf("UVM: pid %d (%s), uid %d killed:" " out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm, p->p_ucred ? (int)p->p_ucred->cr_uid : -1); signal = SIGKILL; } else { if (error == EACCES) sicode = SEGV_ACCERR; else if (error == EIO) { signal = SIGBUS; sicode = BUS_OBJERR; } } sv.sival_ptr = (void *)cr2; trapsignal(p, signal, T_PAGEFLT, sicode, sv); return 1; } /* * kpageflttrap(frame, usermode): page fault handler * Returns non-zero if the fault was handled (possibly by generating * a signal). Returns zero, possibly still holding the kernel lock, * if something was so broken that we should panic. */ int kpageflttrap(struct trapframe *frame, uint64_t cr2) { struct proc *p = curproc; struct pcb *pcb; vaddr_t va = trunc_page((vaddr_t)cr2); struct vm_map *map; vm_prot_t access_type = pgex2access(frame->tf_err); caddr_t onfault; int error; if (p == NULL || p->p_addr == NULL || p->p_vmspace == NULL) return 0; pcb = &p->p_addr->u_pcb; /* This will only trigger if SMEP is enabled */ if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I) { KERNEL_LOCK(); fault("attempt to execute user address %p " "in supervisor mode", (void *)cr2); /* retain kernel lock */ return 0; } /* This will only trigger if SMAP is enabled */ if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_P) { KERNEL_LOCK(); fault("attempt to access user address %p " "in supervisor mode", (void *)cr2); /* retain kernel lock */ return 0; } /* * It is only a kernel address space fault iff: * 1. when running in ring 0 and * 2. pcb_onfault not set or * 3. pcb_onfault set but supervisor space fault * The last can occur during an exec() copyin where the * argument space is lazy-allocated. */ map = &p->p_vmspace->vm_map; if (va >= VM_MIN_KERNEL_ADDRESS) map = kernel_map; if (curcpu()->ci_inatomic == 0 || map == kernel_map) { onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; KERNEL_LOCK(); error = uvm_fault(map, va, 0, access_type); KERNEL_UNLOCK(); pcb->pcb_onfault = onfault; if (error == 0 && map != kernel_map) uvm_grow(p, va); } else error = EFAULT; if (error) { if (pcb->pcb_onfault == NULL) { /* bad memory access in the kernel */ KERNEL_LOCK(); fault("uvm_fault(%p, 0x%llx, 0, %d) -> %x", map, cr2, access_type, error); /* retain kernel lock */ return 0; } frame->tf_rip = (u_int64_t)pcb->pcb_onfault; } return 1; } /* * kerntrap(frame): * Exception, fault, and trap interface to BSD kernel. This * common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void kerntrap(struct trapframe *frame) { int type = (int)frame->tf_trapno; uint64_t cr2 = rcr2(); verify_smap(__func__); uvmexp.traps++; debug_trap(frame, curproc, type); switch (type) { default: we_re_toast: #ifdef DDB if (db_ktrap(type, 0, frame)) return; #endif trap_print(frame, type); panic("trap type %d, code=%llx, pc=%llx", type, frame->tf_err, frame->tf_rip); /*NOTREACHED*/ case T_PAGEFLT: /* allow page faults in kernel mode */ if (kpageflttrap(frame, cr2)) return; goto we_re_toast; #if NISA > 0 case T_NMI: #ifdef DDB /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); if (db_ktrap(type, 0, frame)) return; #endif /* machine/parity/power fail/"kitchen sink" faults */ if (x86_nmi() != 0) goto we_re_toast; else return; #endif /* NISA > 0 */ } } /* * usertrap(frame): handler for exceptions, faults, and traps from userspace * This is called from the assembly language IDT gate entries * which prepare a suitable stack frame and restores the CPU state * after the fault has been processed. */ void usertrap(struct trapframe *frame) { struct proc *p = curproc; int type = (int)frame->tf_trapno; uint64_t cr2 = rcr2(); union sigval sv; int sig, code; verify_smap(__func__); uvmexp.traps++; debug_trap(frame, p, type); p->p_md.md_regs = frame; refreshcreds(p); switch (type) { case T_TSSFLT: sig = SIGBUS; code = BUS_OBJERR; break; case T_PROTFLT: /* protection fault */ case T_SEGNPFLT: case T_STKFLT: frame_dump(frame, p, "SEGV", 0); sig = SIGSEGV; code = SEGV_MAPERR; break; case T_ALIGNFLT: sig = SIGBUS; code = BUS_ADRALN; break; case T_PRIVINFLT: /* privileged instruction fault */ sig = SIGILL; code = ILL_PRVOPC; break; case T_DIVIDE: sig = SIGFPE; code = FPE_INTDIV; break; case T_ARITHTRAP: case T_XMM: /* real arithmetic exceptions */ sig = SIGFPE; code = fputrap(type); break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ sig = SIGTRAP; code = TRAP_BRKPT; break; case T_PAGEFLT: /* page fault */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) goto out; if (upageflttrap(frame, cr2)) goto out; /* FALLTHROUGH */ default: trap_print(frame, type); panic("impossible trap"); } sv.sival_ptr = (void *)frame->tf_rip; trapsignal(p, sig, type, code, sv); out: userret(p); } static void trap_print(struct trapframe *frame, int type) { if (type < trap_types) printf("fatal %s", trap_type[type]); else printf("unknown trap %d", type); printf(" in %s mode\n", KERNELMODE(frame->tf_cs, frame->tf_rflags) ? "supervisor" : "user"); printf("trap type %d code %llx rip %llx cs %llx rflags %llx cr2 " "%llx cpl %x rsp %llx\n", type, frame->tf_err, frame->tf_rip, frame->tf_cs, frame->tf_rflags, rcr2(), curcpu()->ci_ilevel, frame->tf_rsp); printf("gsbase %p kgsbase %p\n", (void *)rdmsr(MSR_GSBASE), (void *)rdmsr(MSR_KERNELGSBASE)); } static inline void frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2) { #ifdef TRAP_SIGDEBUG printf("pid %d (%s): %s at rip %llx addr %llx\n", p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2); printf("rip %p cs 0x%x rfl %p rsp %p ss 0x%x\n", (void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff, (void *)tf->tf_rflags, (void *)tf->tf_rsp, (unsigned)tf->tf_ss & 0xffff); printf("err 0x%llx trapno 0x%llx\n", tf->tf_err, tf->tf_trapno); printf("rdi %p rsi %p rdx %p\n", (void *)tf->tf_rdi, (void *)tf->tf_rsi, (void *)tf->tf_rdx); printf("rcx %p r8 %p r9 %p\n", (void *)tf->tf_rcx, (void *)tf->tf_r8, (void *)tf->tf_r9); printf("r10 %p r11 %p r12 %p\n", (void *)tf->tf_r10, (void *)tf->tf_r11, (void *)tf->tf_r12); printf("r13 %p r14 %p r15 %p\n", (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15); printf("rbp %p rbx %p rax %p\n", (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax); #endif } static inline void verify_smap(const char *func) { #ifdef DIAGNOSTIC if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) { u_long rf = read_rflags(); if (rf & PSL_AC) { write_rflags(rf & ~PSL_AC); panic("%s: AC set on entry", func); } } #endif } static inline void debug_trap(struct trapframe *frame, struct proc *p, long type) { #ifdef DEBUG if (trapdebug) { printf("trap %ld code %llx rip %llx cs %llx rflags %llx " "cr2 %llx cpl %x\n", type, frame->tf_err, frame->tf_rip, frame->tf_cs, frame->tf_rflags, rcr2(), curcpu()->ci_ilevel); printf("curproc %p\n", (void *)p); if (p != NULL) printf("pid %d\n", p->p_p->ps_pid); } #endif } /* * ast(frame): * AST handler. This is called from assembly language stubs when * returning to userspace after a syscall or interrupt. */ void ast(struct trapframe *frame) { struct proc *p = curproc; uvmexp.traps++; KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_rflags)); p->p_md.md_regs = frame; refreshcreds(p); uvmexp.softs++; mi_ast(p, curcpu()->ci_want_resched); userret(p); } /* * syscall(frame): * System call request from POSIX system call gate interface to kernel. */ void syscall(struct trapframe *frame) { caddr_t params; const struct sysent *callp; struct proc *p; int error; int nsys; size_t argsize, argoff; register_t code, args[9], rval[2], *argp; verify_smap(__func__); uvmexp.syscalls++; p = curproc; code = frame->tf_rax; callp = p->p_p->ps_emul->e_sysent; nsys = p->p_p->ps_emul->e_nsysent; argp = &args[0]; argoff = 0; switch (code) { case SYS_syscall: case SYS___syscall: /* * Code is first argument, followed by actual args. */ code = frame->tf_rdi; argp = &args[1]; argoff = 1; break; default: break; } if (code < 0 || code >= nsys) callp += p->p_p->ps_emul->e_nosys; else callp += code; argsize = (callp->sy_argsize >> 3) + argoff; if (argsize) { switch (MIN(argsize, 6)) { case 6: args[5] = frame->tf_r9; case 5: args[4] = frame->tf_r8; case 4: args[3] = frame->tf_r10; case 3: args[2] = frame->tf_rdx; case 2: args[1] = frame->tf_rsi; case 1: args[0] = frame->tf_rdi; break; default: panic("impossible syscall argsize"); } if (argsize > 6) { argsize -= 6; params = (caddr_t)frame->tf_rsp + sizeof(register_t); if ((error = copyin(params, &args[6], argsize << 3))) goto bad; } } rval[0] = 0; rval[1] = frame->tf_rdx; error = mi_syscall(p, code, callp, argp, rval); switch (error) { case 0: frame->tf_rax = rval[0]; frame->tf_rdx = rval[1]; frame->tf_rflags &= ~PSL_C; /* carry bit */ break; case ERESTART: /* Back up over the syscall instruction (2 bytes) */ frame->tf_rip -= 2; break; case EJUSTRETURN: /* nothing to do */ break; default: bad: frame->tf_rax = error; frame->tf_rflags |= PSL_C; /* carry bit */ break; } mi_syscall_return(p, code, error, rval); } void child_return(void *arg) { struct proc *p = arg; struct trapframe *tf = p->p_md.md_regs; tf->tf_rax = 0; tf->tf_rdx = 1; tf->tf_rflags &= ~PSL_C; KERNEL_UNLOCK(); mi_child_return(p); }
1 /************************************************************************** Copyright (c) 2001-2005, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ /* $OpenBSD: if_ixgb.c,v 1.72 2020/07/10 13:26:38 patrick Exp $ */ #include <dev/pci/if_ixgb.h> #ifdef IXGB_DEBUG /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int ixgb_display_debug_stats = 0; #endif /********************************************************************* * Driver version *********************************************************************/ #define IXGB_DRIVER_VERSION "6.1.0" /********************************************************************* * PCI Device ID Table *********************************************************************/ const struct pci_matchid ixgb_devices[] = { { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82597EX }, { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82597EX_SR }, { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82597EX_LR }, { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82597EX_CX4 }, }; /********************************************************************* * Function prototypes *********************************************************************/ int ixgb_probe(struct device *, void *, void *); void ixgb_attach(struct device *, struct device *, void *); int ixgb_intr(void *); void ixgb_start(struct ifnet *); int ixgb_ioctl(struct ifnet *, u_long, caddr_t); void ixgb_watchdog(struct ifnet *); void ixgb_init(void *); void ixgb_stop(void *); void ixgb_media_status(struct ifnet *, struct ifmediareq *); int ixgb_media_change(struct ifnet *); void ixgb_identify_hardware(struct ixgb_softc *); int ixgb_allocate_pci_resources(struct ixgb_softc *); void ixgb_free_pci_resources(struct ixgb_softc *); void ixgb_local_timer(void *); int ixgb_hardware_init(struct ixgb_softc *); void ixgb_setup_interface(struct ixgb_softc *); int ixgb_setup_transmit_structures(struct ixgb_softc *); void ixgb_initialize_transmit_unit(struct ixgb_softc *); int ixgb_setup_receive_structures(struct ixgb_softc *); void ixgb_initialize_receive_unit(struct ixgb_softc *); void ixgb_enable_intr(struct ixgb_softc *); void ixgb_disable_intr(struct ixgb_softc *); void ixgb_free_transmit_structures(struct ixgb_softc *); void ixgb_free_receive_structures(struct ixgb_softc *); void ixgb_update_stats_counters(struct ixgb_softc *); void ixgb_txeof(struct ixgb_softc *); int ixgb_allocate_receive_structures(struct ixgb_softc *); int ixgb_allocate_transmit_structures(struct ixgb_softc *); void ixgb_rxeof(struct ixgb_softc *, int); void ixgb_receive_checksum(struct ixgb_softc *, struct ixgb_rx_desc * rx_desc, struct mbuf *); void ixgb_transmit_checksum_setup(struct ixgb_softc *, struct mbuf *, u_int8_t *); void ixgb_set_promisc(struct ixgb_softc *); void ixgb_set_multi(struct ixgb_softc *); #ifdef IXGB_DEBUG void ixgb_print_hw_stats(struct ixgb_softc *); #endif void ixgb_update_link_status(struct ixgb_softc *); int ixgb_get_buf(struct ixgb_softc *, int i, struct mbuf *); void ixgb_enable_hw_vlans(struct ixgb_softc *); int ixgb_encap(struct ixgb_softc *, struct mbuf *); int ixgb_dma_malloc(struct ixgb_softc *, bus_size_t, struct ixgb_dma_alloc *, int); void ixgb_dma_free(struct ixgb_softc *, struct ixgb_dma_alloc *); /********************************************************************* * OpenBSD Device Interface Entry Points *********************************************************************/ struct cfattach ixgb_ca = { sizeof(struct ixgb_softc), ixgb_probe, ixgb_attach }; struct cfdriver ixgb_cd = { NULL, "ixgb", DV_IFNET }; /* some defines for controlling descriptor fetches in h/w */ #define RXDCTL_PTHRESH_DEFAULT 0 /* chip considers prefech below this */ #define RXDCTL_HTHRESH_DEFAULT 0 /* chip will only prefetch if tail is * pushed this many descriptors from * head */ #define RXDCTL_WTHRESH_DEFAULT 0 /* chip writes back at this many or RXT0 */ /********************************************************************* * Device identification routine * * ixgb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return 0 on no match, positive on match *********************************************************************/ int ixgb_probe(struct device *parent, void *match, void *aux) { INIT_DEBUGOUT("ixgb_probe: begin"); return (pci_matchbyid((struct pci_attach_args *)aux, ixgb_devices, nitems(ixgb_devices))); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * *********************************************************************/ void ixgb_attach(struct device *parent, struct device *self, void *aux) { struct pci_attach_args *pa = aux; struct ixgb_softc *sc; int tsize, rsize; INIT_DEBUGOUT("ixgb_attach: begin"); sc = (struct ixgb_softc *)self; sc->osdep.ixgb_pa = *pa; timeout_set(&sc->timer_handle, ixgb_local_timer, sc); /* Determine hardware revision */ ixgb_identify_hardware(sc); /* Parameters (to be read from user) */ sc->num_tx_desc = IXGB_MAX_TXD; sc->num_rx_desc = IXGB_MAX_RXD; sc->tx_int_delay = TIDV; sc->rx_int_delay = RDTR; sc->rx_buffer_len = IXGB_RXBUFFER_2048; /* * These parameters control the automatic generation(Tx) and * response(Rx) to Ethernet PAUSE frames. */ sc->hw.fc.high_water = FCRTH; sc->hw.fc.low_water = FCRTL; sc->hw.fc.pause_time = FCPAUSE; sc->hw.fc.send_xon = TRUE; sc->hw.fc.type = FLOW_CONTROL; /* Set the max frame size assuming standard ethernet sized frames */ sc->hw.max_frame_size = IXGB_MAX_JUMBO_FRAME_SIZE; if (ixgb_allocate_pci_resources(sc)) goto err_pci; tsize = IXGB_ROUNDUP(sc->num_tx_desc * sizeof(struct ixgb_tx_desc), IXGB_MAX_TXD * sizeof(struct ixgb_tx_desc)); tsize = IXGB_ROUNDUP(tsize, PAGE_SIZE); /* Allocate Transmit Descriptor ring */ if (ixgb_dma_malloc(sc, tsize, &sc->txdma, BUS_DMA_NOWAIT)) { printf("%s: Unable to allocate TxDescriptor memory\n", sc->sc_dv.dv_xname); goto err_tx_desc; } sc->tx_desc_base = (struct ixgb_tx_desc *) sc->txdma.dma_vaddr; rsize = IXGB_ROUNDUP(sc->num_rx_desc * sizeof(struct ixgb_rx_desc), IXGB_MAX_RXD * sizeof(struct ixgb_rx_desc)); rsize = IXGB_ROUNDUP(rsize, PAGE_SIZE); /* Allocate Receive Descriptor ring */ if (ixgb_dma_malloc(sc, rsize, &sc->rxdma, BUS_DMA_NOWAIT)) { printf("%s: Unable to allocate rx_desc memory\n", sc->sc_dv.dv_xname); goto err_rx_desc; } sc->rx_desc_base = (struct ixgb_rx_desc *) sc->rxdma.dma_vaddr; /* Initialize the hardware */ if (ixgb_hardware_init(sc)) { printf("%s: Unable to initialize the hardware\n", sc->sc_dv.dv_xname); goto err_hw_init; } /* Setup OS specific network interface */ ixgb_setup_interface(sc); /* Initialize statistics */ ixgb_clear_hw_cntrs(&sc->hw); ixgb_update_stats_counters(sc); ixgb_update_link_status(sc); printf(", address %s\n", ether_sprintf(sc->interface_data.ac_enaddr)); INIT_DEBUGOUT("ixgb_attach: end"); return; err_hw_init: ixgb_dma_free(sc, &sc->rxdma); err_rx_desc: ixgb_dma_free(sc, &sc->txdma); err_tx_desc: err_pci: ixgb_free_pci_resources(sc); } /********************************************************************* * Transmit entry point * * ixgb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ void ixgb_start(struct ifnet *ifp) { struct mbuf *m_head; struct ixgb_softc *sc = ifp->if_softc; int post = 0; if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) return; if (!sc->link_active) return; bus_dmamap_sync(sc->txdma.dma_tag, sc->txdma.dma_map, 0, sc->txdma.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (;;) { m_head = ifq_deq_begin(&ifp->if_snd); if (m_head == NULL) break; if (ixgb_encap(sc, m_head)) { ifq_deq_rollback(&ifp->if_snd, m_head); ifq_set_oactive(&ifp->if_snd); break; } ifq_deq_commit(&ifp->if_snd, m_head); #if NBPFILTER > 0 /* Send a copy of the frame to the BPF listener */ if (ifp->if_bpf) bpf_mtap_ether(ifp->if_bpf, m_head, BPF_DIRECTION_OUT); #endif /* Set timeout in case hardware has problems transmitting */ ifp->if_timer = IXGB_TX_TIMEOUT; post = 1; } bus_dmamap_sync(sc->txdma.dma_tag, sc->txdma.dma_map, 0, sc->txdma.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), * this tells the E1000 that this frame * is available to transmit. */ if (post) IXGB_WRITE_REG(&sc->hw, TDT, sc->next_avail_tx_desc); } /********************************************************************* * Ioctl entry point * * ixgb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ int ixgb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ixgb_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int s, error = 0; s = splnet(); switch (command) { case SIOCSIFADDR: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFADDR (Set Interface " "Addr)"); ifp->if_flags |= IFF_UP; if (!(ifp->if_flags & IFF_RUNNING)) ixgb_init(sc); break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)"); if (ifp->if_flags & IFF_UP) { /* * If only the PROMISC or ALLMULTI flag changes, then * don't do a full re-init of the chip, just update * the Rx filter. */ if ((ifp->if_flags & IFF_RUNNING) && ((ifp->if_flags ^ sc->if_flags) & (IFF_ALLMULTI | IFF_PROMISC)) != 0) { ixgb_set_promisc(sc); } else { if (!(ifp->if_flags & IFF_RUNNING)) ixgb_init(sc); } } else { if (ifp->if_flags & IFF_RUNNING) ixgb_stop(sc); } sc->if_flags = ifp->if_flags; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; default: error = ether_ioctl(ifp, &sc->interface_data, command, data); } if (error == ENETRESET) { if (ifp->if_flags & IFF_RUNNING) { ixgb_disable_intr(sc); ixgb_set_multi(sc); ixgb_enable_intr(sc); } error = 0; } splx(s); return (error); } /********************************************************************* * Watchdog entry point * * This routine is called whenever hardware quits transmitting. * **********************************************************************/ void ixgb_watchdog(struct ifnet * ifp) { struct ixgb_softc *sc = ifp->if_softc; /* * If we are in this routine because of pause frames, then don't * reset the hardware. */ if (IXGB_READ_REG(&sc->hw, STATUS) & IXGB_STATUS_TXOFF) { ifp->if_timer = IXGB_TX_TIMEOUT; return; } printf("%s: watchdog timeout -- resetting\n", sc->sc_dv.dv_xname); ixgb_init(sc); sc->watchdog_events++; } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * **********************************************************************/ void ixgb_init(void *arg) { struct ixgb_softc *sc = arg; struct ifnet *ifp = &sc->interface_data.ac_if; uint32_t temp_reg; int s; INIT_DEBUGOUT("ixgb_init: begin"); s = splnet(); ixgb_stop(sc); /* Get the latest mac address, User can use a LAA */ bcopy(sc->interface_data.ac_enaddr, sc->hw.curr_mac_addr, IXGB_ETH_LENGTH_OF_ADDRESS); /* Initialize the hardware */ if (ixgb_hardware_init(sc)) { printf("%s: Unable to initialize the hardware\n", sc->sc_dv.dv_xname); splx(s); return; } if (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) ixgb_enable_hw_vlans(sc); /* Prepare transmit descriptors and buffers */ if (ixgb_setup_transmit_structures(sc)) { printf("%s: Could not setup transmit structures\n", sc->sc_dv.dv_xname); ixgb_stop(sc); splx(s); return; } ixgb_initialize_transmit_unit(sc); /* Setup Multicast table */ ixgb_set_multi(sc); /* Prepare receive descriptors and buffers */ if (ixgb_setup_receive_structures(sc)) { printf("%s: Could not setup receive structures\n", sc->sc_dv.dv_xname); ixgb_stop(sc); splx(s); return; } ixgb_initialize_receive_unit(sc); /* Don't lose promiscuous settings */ ixgb_set_promisc(sc); ifp->if_flags |= IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); /* Enable jumbo frames */ IXGB_WRITE_REG(&sc->hw, MFRMS, sc->hw.max_frame_size << IXGB_MFRMS_SHIFT); temp_reg = IXGB_READ_REG(&sc->hw, CTRL0); temp_reg |= IXGB_CTRL0_JFE; IXGB_WRITE_REG(&sc->hw, CTRL0, temp_reg); timeout_add_sec(&sc->timer_handle, 1); ixgb_clear_hw_cntrs(&sc->hw); ixgb_enable_intr(sc); splx(s); } /********************************************************************* * * Interrupt Service routine * **********************************************************************/ int ixgb_intr(void *arg) { struct ixgb_softc *sc = arg; struct ifnet *ifp; u_int32_t reg_icr; boolean_t rxdmt0 = FALSE; int claimed = 0; ifp = &sc->interface_data.ac_if; for (;;) { reg_icr = IXGB_READ_REG(&sc->hw, ICR); if (reg_icr == 0) break; claimed = 1; if (reg_icr & IXGB_INT_RXDMT0) rxdmt0 = TRUE; if (ifp->if_flags & IFF_RUNNING) { ixgb_rxeof(sc, -1); ixgb_txeof(sc); } /* Link status change */ if (reg_icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)) { timeout_del(&sc->timer_handle); ixgb_check_for_link(&sc->hw); ixgb_update_link_status(sc); timeout_add_sec(&sc->timer_handle, 1); } if (rxdmt0 && sc->raidc) { IXGB_WRITE_REG(&sc->hw, IMC, IXGB_INT_RXDMT0); IXGB_WRITE_REG(&sc->hw, IMS, IXGB_INT_RXDMT0); } } if (ifp->if_flags & IFF_RUNNING && !ifq_empty(&ifp->if_snd)) ixgb_start(ifp); return (claimed); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ void ixgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct ixgb_softc *sc = ifp->if_softc; INIT_DEBUGOUT("ixgb_media_status: begin"); ixgb_check_for_link(&sc->hw); ixgb_update_link_status(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hw.link_up) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; if ((sc->hw.phy_type == ixgb_phy_type_g6104) || (sc->hw.phy_type == ixgb_phy_type_txn17401)) ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; else ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ int ixgb_media_change(struct ifnet * ifp) { struct ixgb_softc *sc = ifp->if_softc; struct ifmedia *ifm = &sc->media; INIT_DEBUGOUT("ixgb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ int ixgb_encap(struct ixgb_softc *sc, struct mbuf *m_head) { u_int8_t txd_popts; int i, j, error = 0; bus_dmamap_t map; struct ixgb_buffer *tx_buffer; struct ixgb_tx_desc *current_tx_desc = NULL; /* * Force a cleanup if number of TX descriptors available hits the * threshold */ if (sc->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { ixgb_txeof(sc); /* Now do we at least have a minimal? */ if (sc->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { sc->no_tx_desc_avail1++; return (ENOBUFS); } } /* * Map the packet for DMA. */ tx_buffer = &sc->tx_buffer_area[sc->next_avail_tx_desc]; map = tx_buffer->map; error = bus_dmamap_load_mbuf(sc->txtag, map, m_head, BUS_DMA_NOWAIT); if (error != 0) { sc->no_tx_dma_setup++; return (error); } IXGB_KASSERT(map->dm_nsegs != 0, ("ixgb_encap: empty packet")); if (map->dm_nsegs > sc->num_tx_desc_avail) goto fail; #ifdef IXGB_CSUM_OFFLOAD ixgb_transmit_checksum_setup(sc, m_head, &txd_popts); #else txd_popts = 0; #endif i = sc->next_avail_tx_desc; for (j = 0; j < map->dm_nsegs; j++) { tx_buffer = &sc->tx_buffer_area[i]; current_tx_desc = &sc->tx_desc_base[i]; current_tx_desc->buff_addr = htole64(map->dm_segs[j].ds_addr); current_tx_desc->cmd_type_len = htole32((sc->txd_cmd | map->dm_segs[j].ds_len)); current_tx_desc->popts = txd_popts; if (++i == sc->num_tx_desc) i = 0; tx_buffer->m_head = NULL; } sc->num_tx_desc_avail -= map->dm_nsegs; sc->next_avail_tx_desc = i; /* Find out if we are in VLAN mode */ if (m_head->m_flags & M_VLANTAG) { /* Set the VLAN id */ current_tx_desc->vlan = htole16(m_head->m_pkthdr.ether_vtag); /* Tell hardware to add tag */ current_tx_desc->cmd_type_len |= htole32(IXGB_TX_DESC_CMD_VLE); } tx_buffer->m_head = m_head; bus_dmamap_sync(sc->txtag, map, 0, map->dm_mapsize, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet needs End Of Packet (EOP) */ current_tx_desc->cmd_type_len |= htole32(IXGB_TX_DESC_CMD_EOP); return (0); fail: sc->no_tx_desc_avail2++; bus_dmamap_unload(sc->txtag, map); return (ENOBUFS); } void ixgb_set_promisc(struct ixgb_softc *sc) { u_int32_t reg_rctl; struct ifnet *ifp = &sc->interface_data.ac_if; reg_rctl = IXGB_READ_REG(&sc->hw, RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGB_RCTL_UPE | IXGB_RCTL_MPE); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGB_RCTL_MPE; reg_rctl &= ~IXGB_RCTL_UPE; } else { reg_rctl &= ~(IXGB_RCTL_UPE | IXGB_RCTL_MPE); } IXGB_WRITE_REG(&sc->hw, RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ void ixgb_set_multi(struct ixgb_softc *sc) { u_int32_t reg_rctl = 0; u_int8_t mta[MAX_NUM_MULTICAST_ADDRESSES * IXGB_ETH_LENGTH_OF_ADDRESS]; int mcnt = 0; struct ifnet *ifp = &sc->interface_data.ac_if; struct arpcom *ac = &sc->interface_data; struct ether_multi *enm; struct ether_multistep step; IOCTL_DEBUGOUT("ixgb_set_multi: begin"); if (ac->ac_multirangecnt > 0) { ifp->if_flags |= IFF_ALLMULTI; mcnt = MAX_NUM_MULTICAST_ADDRESSES; goto setit; } ETHER_FIRST_MULTI(step, ac, enm); while (enm != NULL) { if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(enm->enm_addrlo, &mta[mcnt*IXGB_ETH_LENGTH_OF_ADDRESS], IXGB_ETH_LENGTH_OF_ADDRESS); mcnt++; ETHER_NEXT_MULTI(step, enm); } setit: if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = IXGB_READ_REG(&sc->hw, RCTL); reg_rctl |= IXGB_RCTL_MPE; IXGB_WRITE_REG(&sc->hw, RCTL, reg_rctl); } else ixgb_mc_addr_list_update(&sc->hw, mta, mcnt, 0); } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ void ixgb_local_timer(void *arg) { struct ifnet *ifp; struct ixgb_softc *sc = arg; int s; ifp = &sc->interface_data.ac_if; s = splnet(); ixgb_check_for_link(&sc->hw); ixgb_update_link_status(sc); ixgb_update_stats_counters(sc); #ifdef IXGB_DEBUG if (ixgb_display_debug_stats && ifp->if_flags & IFF_RUNNING) ixgb_print_hw_stats(sc); #endif timeout_add_sec(&sc->timer_handle, 1); splx(s); } void ixgb_update_link_status(struct ixgb_softc *sc) { struct ifnet *ifp = &sc->interface_data.ac_if; if (sc->hw.link_up) { if (!sc->link_active) { ifp->if_baudrate = IF_Gbps(10); sc->link_active = 1; ifp->if_link_state = LINK_STATE_FULL_DUPLEX; if_link_state_change(ifp); } } else { if (sc->link_active) { ifp->if_baudrate = 0; sc->link_active = 0; ifp->if_link_state = LINK_STATE_DOWN; if_link_state_change(ifp); } } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ void ixgb_stop(void *arg) { struct ifnet *ifp; struct ixgb_softc *sc = arg; ifp = &sc->interface_data.ac_if; INIT_DEBUGOUT("ixgb_stop: begin\n"); ixgb_disable_intr(sc); sc->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&sc->hw); timeout_del(&sc->timer_handle); /* Tell the stack that the interface is no longer active */ ifp->if_flags &= ~IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); ixgb_free_transmit_structures(sc); ixgb_free_receive_structures(sc); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ void ixgb_identify_hardware(struct ixgb_softc *sc) { u_int32_t reg; struct pci_attach_args *pa = &sc->osdep.ixgb_pa; /* Make sure our PCI config space has the necessary stuff set */ sc->hw.pci_cmd_word = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG); /* Save off the information about this board */ sc->hw.vendor_id = PCI_VENDOR(pa->pa_id); sc->hw.device_id = PCI_PRODUCT(pa->pa_id); reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_CLASS_REG); sc->hw.revision_id = PCI_REVISION(reg); reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG); sc->hw.subsystem_vendor_id = PCI_VENDOR(reg); sc->hw.subsystem_id = PCI_PRODUCT(reg); /* Set MacType, etc. based on this PCI info */ switch (sc->hw.device_id) { case IXGB_DEVICE_ID_82597EX: case IXGB_DEVICE_ID_82597EX_SR: case IXGB_DEVICE_ID_82597EX_LR: case IXGB_DEVICE_ID_82597EX_CX4: sc->hw.mac_type = ixgb_82597; break; default: INIT_DEBUGOUT1("Unknown device if 0x%x", sc->hw.device_id); printf("%s: unsupported device id 0x%x\n", sc->sc_dv.dv_xname, sc->hw.device_id); } } int ixgb_allocate_pci_resources(struct ixgb_softc *sc) { int val; pci_intr_handle_t ih; const char *intrstr = NULL; struct pci_attach_args *pa = &sc->osdep.ixgb_pa; pci_chipset_tag_t pc = pa->pa_pc; val = pci_conf_read(pa->pa_pc, pa->pa_tag, IXGB_MMBA); if (PCI_MAPREG_TYPE(val) != PCI_MAPREG_TYPE_MEM) { printf(": mmba is not mem space\n"); return (ENXIO); } if (pci_mapreg_map(pa, IXGB_MMBA, PCI_MAPREG_MEM_TYPE(val), 0, &sc->osdep.mem_bus_space_tag, &sc->osdep.mem_bus_space_handle, &sc->osdep.ixgb_membase, &sc->osdep.ixgb_memsize, 0)) { printf(": cannot find mem space\n"); return (ENXIO); } if (pci_intr_map(pa, &ih)) { printf(": couldn't map interrupt\n"); return (ENXIO); } sc->hw.back = &sc->osdep; intrstr = pci_intr_string(pc, ih); sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET, ixgb_intr, sc, sc->sc_dv.dv_xname); if (sc->sc_intrhand == NULL) { printf(": couldn't establish interrupt"); if (intrstr != NULL) printf(" at %s", intrstr); printf("\n"); return (ENXIO); } printf(": %s", intrstr); return (0); } void ixgb_free_pci_resources(struct ixgb_softc *sc) { struct pci_attach_args *pa = &sc->osdep.ixgb_pa; pci_chipset_tag_t pc = pa->pa_pc; if (sc->sc_intrhand) pci_intr_disestablish(pc, sc->sc_intrhand); sc->sc_intrhand = 0; if (sc->osdep.ixgb_membase) bus_space_unmap(sc->osdep.mem_bus_space_tag, sc->osdep.mem_bus_space_handle, sc->osdep.ixgb_memsize); sc->osdep.ixgb_membase = 0; } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * adapter structure. The controller is reset, the EEPROM is * verified, the MAC address is set, then the shared initialization * routines are called. * **********************************************************************/ int ixgb_hardware_init(struct ixgb_softc *sc) { /* Issue a global reset */ sc->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&sc->hw); /* Make sure we have a good EEPROM before we read from it */ if (!ixgb_validate_eeprom_checksum(&sc->hw)) { printf("%s: The EEPROM Checksum Is Not Valid\n", sc->sc_dv.dv_xname); return (EIO); } if (!ixgb_init_hw(&sc->hw)) { printf("%s: Hardware Initialization Failed", sc->sc_dv.dv_xname); return (EIO); } bcopy(sc->hw.curr_mac_addr, sc->interface_data.ac_enaddr, IXGB_ETH_LENGTH_OF_ADDRESS); return (0); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ void ixgb_setup_interface(struct ixgb_softc *sc) { struct ifnet *ifp; INIT_DEBUGOUT("ixgb_setup_interface: begin"); ifp = &sc->interface_data.ac_if; strlcpy(ifp->if_xname, sc->sc_dv.dv_xname, IFNAMSIZ); ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgb_ioctl; ifp->if_start = ixgb_start; ifp->if_watchdog = ixgb_watchdog; ifp->if_hardmtu = IXGB_MAX_JUMBO_FRAME_SIZE - ETHER_HDR_LEN - ETHER_CRC_LEN; ifq_set_maxlen(&ifp->if_snd, sc->num_tx_desc - 1); ifp->if_capabilities = IFCAP_VLAN_MTU; #if NVLAN > 0 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; #endif #ifdef IXGB_CSUM_OFFLOAD ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4; #endif /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&sc->media, IFM_IMASK, ixgb_media_change, ixgb_media_status); if ((sc->hw.phy_type == ixgb_phy_type_g6104) || (sc->hw.phy_type == ixgb_phy_type_txn17401)) { ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX, 0, NULL); } else { ifmedia_add(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX, 0, NULL); } ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); if_attach(ifp); ether_ifattach(ifp); } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ int ixgb_dma_malloc(struct ixgb_softc *sc, bus_size_t size, struct ixgb_dma_alloc * dma, int mapflags) { int r; dma->dma_tag = sc->osdep.ixgb_pa.pa_dmat; r = bus_dmamap_create(dma->dma_tag, size, 1, size, 0, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { printf("%s: ixgb_dma_malloc: bus_dmamap_create failed; " "error %u\n", sc->sc_dv.dv_xname, r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg, 1, &dma->dma_nseg, BUS_DMA_NOWAIT); if (r != 0) { printf("%s: ixgb_dma_malloc: bus_dmammem_alloc failed; " "size %lu, error %d\n", sc->sc_dv.dv_xname, (unsigned long)size, r); goto fail_1; } r = bus_dmamem_map(dma->dma_tag, &dma->dma_seg, dma->dma_nseg, size, &dma->dma_vaddr, BUS_DMA_NOWAIT); if (r != 0) { printf("%s: ixgb_dma_malloc: bus_dmammem_map failed; " "size %lu, error %d\n", sc->sc_dv.dv_xname, (unsigned long)size, r); goto fail_2; } r = bus_dmamap_load(sc->osdep.ixgb_pa.pa_dmat, dma->dma_map, dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT); if (r != 0) { printf("%s: ixgb_dma_malloc: bus_dmamap_load failed; " "error %u\n", sc->sc_dv.dv_xname, r); goto fail_3; } dma->dma_size = size; return (0); fail_3: bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, size); fail_2: bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg); fail_1: bus_dmamap_destroy(dma->dma_tag, dma->dma_map); fail_0: dma->dma_map = NULL; dma->dma_tag = NULL; return (r); } void ixgb_dma_free(struct ixgb_softc *sc, struct ixgb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_map != NULL) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, 0, dma->dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, dma->dma_size); bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg); bus_dmamap_destroy(dma->dma_tag, dma->dma_map); } } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. * **********************************************************************/ int ixgb_allocate_transmit_structures(struct ixgb_softc *sc) { if (!(sc->tx_buffer_area = mallocarray(sc->num_tx_desc, sizeof(struct ixgb_buffer), M_DEVBUF, M_NOWAIT | M_ZERO))) { printf("%s: Unable to allocate tx_buffer memory\n", sc->sc_dv.dv_xname); return (ENOMEM); } return (0); } /********************************************************************* * * Allocate and initialize transmit structures. * **********************************************************************/ int ixgb_setup_transmit_structures(struct ixgb_softc *sc) { struct ixgb_buffer *tx_buffer; int error, i; if ((error = ixgb_allocate_transmit_structures(sc)) != 0) goto fail; bzero((void *)sc->tx_desc_base, (sizeof(struct ixgb_tx_desc)) * sc->num_tx_desc); sc->txtag = sc->osdep.ixgb_pa.pa_dmat; tx_buffer = sc->tx_buffer_area; for (i = 0; i < sc->num_tx_desc; i++) { error = bus_dmamap_create(sc->txtag, IXGB_MAX_JUMBO_FRAME_SIZE, IXGB_MAX_SCATTER, IXGB_MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &tx_buffer->map); if (error != 0) { printf("%s: Unable to create TX DMA map\n", sc->sc_dv.dv_xname); goto fail; } tx_buffer++; } sc->next_avail_tx_desc = 0; sc->oldest_used_tx_desc = 0; /* Set number of descriptors available */ sc->num_tx_desc_avail = sc->num_tx_desc; /* Set checksum context */ sc->active_checksum_context = OFFLOAD_NONE; bus_dmamap_sync(sc->txdma.dma_tag, sc->txdma.dma_map, 0, sc->txdma.dma_size, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); fail: ixgb_free_transmit_structures(sc); return (error); } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ void ixgb_initialize_transmit_unit(struct ixgb_softc *sc) { u_int32_t reg_tctl; u_int64_t bus_addr; /* Setup the Base and Length of the Tx Descriptor Ring */ bus_addr = sc->txdma.dma_map->dm_segs[0].ds_addr; IXGB_WRITE_REG(&sc->hw, TDBAL, (u_int32_t)bus_addr); IXGB_WRITE_REG(&sc->hw, TDBAH, (u_int32_t)(bus_addr >> 32)); IXGB_WRITE_REG(&sc->hw, TDLEN, sc->num_tx_desc * sizeof(struct ixgb_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGB_WRITE_REG(&sc->hw, TDH, 0); IXGB_WRITE_REG(&sc->hw, TDT, 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", IXGB_READ_REG(&sc->hw, TDBAL), IXGB_READ_REG(&sc->hw, TDLEN)); IXGB_WRITE_REG(&sc->hw, TIDV, sc->tx_int_delay); /* Program the Transmit Control Register */ reg_tctl = IXGB_READ_REG(&sc->hw, TCTL); reg_tctl = IXGB_TCTL_TCE | IXGB_TCTL_TXEN | IXGB_TCTL_TPDE; IXGB_WRITE_REG(&sc->hw, TCTL, reg_tctl); /* Setup Transmit Descriptor Settings for this adapter */ sc->txd_cmd = IXGB_TX_DESC_TYPE | IXGB_TX_DESC_CMD_RS; if (sc->tx_int_delay > 0) sc->txd_cmd |= IXGB_TX_DESC_CMD_IDE; } /********************************************************************* * * Free all transmit related data structures. * **********************************************************************/ void ixgb_free_transmit_structures(struct ixgb_softc *sc) { struct ixgb_buffer *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_structures: begin"); if (sc->tx_buffer_area != NULL) { tx_buffer = sc->tx_buffer_area; for (i = 0; i < sc->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->map != NULL && tx_buffer->map->dm_nsegs > 0) { bus_dmamap_sync(sc->txtag, tx_buffer->map, 0, tx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->txtag, tx_buffer->map); } if (tx_buffer->m_head != NULL) { m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } if (tx_buffer->map != NULL) { bus_dmamap_destroy(sc->txtag, tx_buffer->map); tx_buffer->map = NULL; } } } if (sc->tx_buffer_area != NULL) { free(sc->tx_buffer_area, M_DEVBUF, 0); sc->tx_buffer_area = NULL; } if (sc->txtag != NULL) { sc->txtag = NULL; } } /********************************************************************* * * The offload context needs to be set when we transfer the first * packet of a particular protocol (TCP/UDP). We change the * context only if the protocol type changes. * **********************************************************************/ void ixgb_transmit_checksum_setup(struct ixgb_softc *sc, struct mbuf *mp, u_int8_t *txd_popts) { struct ixgb_context_desc *TXD; struct ixgb_buffer *tx_buffer; int curr_txd; if (mp->m_pkthdr.csum_flags) { if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (sc->active_checksum_context == OFFLOAD_TCP_IP) return; else sc->active_checksum_context = OFFLOAD_TCP_IP; } else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (sc->active_checksum_context == OFFLOAD_UDP_IP) return; else sc->active_checksum_context = OFFLOAD_UDP_IP; } else { *txd_popts = 0; return; } } else { *txd_popts = 0; return; } /* * If we reach this point, the checksum offload context needs to be * reset. */ curr_txd = sc->next_avail_tx_desc; tx_buffer = &sc->tx_buffer_area[curr_txd]; TXD = (struct ixgb_context_desc *) & sc->tx_desc_base[curr_txd]; TXD->tucss = ENET_HEADER_SIZE + sizeof(struct ip); TXD->tucse = 0; TXD->mss = 0; if (sc->active_checksum_context == OFFLOAD_TCP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct tcphdr, th_sum); } else if (sc->active_checksum_context == OFFLOAD_UDP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct udphdr, uh_sum); } TXD->cmd_type_len = htole32(IXGB_CONTEXT_DESC_CMD_TCP | IXGB_TX_DESC_CMD_RS | IXGB_CONTEXT_DESC_CMD_IDE); tx_buffer->m_head = NULL; if (++curr_txd == sc->num_tx_desc) curr_txd = 0; sc->num_tx_desc_avail--; sc->next_avail_tx_desc = curr_txd; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ void ixgb_txeof(struct ixgb_softc *sc) { int i, num_avail; struct ixgb_buffer *tx_buffer; struct ixgb_tx_desc *tx_desc; struct ifnet *ifp = &sc->interface_data.ac_if; if (sc->num_tx_desc_avail == sc->num_tx_desc) return; num_avail = sc->num_tx_desc_avail; i = sc->oldest_used_tx_desc; tx_buffer = &sc->tx_buffer_area[i]; tx_desc = &sc->tx_desc_base[i]; bus_dmamap_sync(sc->txdma.dma_tag, sc->txdma.dma_map, 0, sc->txdma.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD); while (tx_desc->status & IXGB_TX_DESC_STATUS_DD) { tx_desc->status = 0; num_avail++; if (tx_buffer->m_head != NULL) { if (tx_buffer->map->dm_nsegs > 0) { bus_dmamap_sync(sc->txtag, tx_buffer->map, 0, tx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->txtag, tx_buffer->map); } m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } if (++i == sc->num_tx_desc) i = 0; tx_buffer = &sc->tx_buffer_area[i]; tx_desc = &sc->tx_desc_base[i]; } bus_dmamap_sync(sc->txdma.dma_tag, sc->txdma.dma_map, 0, sc->txdma.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->oldest_used_tx_desc = i; /* * If we have enough room, clear IFF_OACTIVE to tell the stack that * it is OK to send packets. If there are no pending descriptors, * clear the timeout. Otherwise, if some descriptors have been freed, * restart the timeout. */ if (num_avail > IXGB_TX_CLEANUP_THRESHOLD) ifq_clr_oactive(&ifp->if_snd); /* All clean, turn off the timer */ if (num_avail == sc->num_tx_desc) ifp->if_timer = 0; /* Some cleaned, reset the timer */ else if (num_avail != sc->num_tx_desc_avail) ifp->if_timer = IXGB_TX_TIMEOUT; sc->num_tx_desc_avail = num_avail; } /********************************************************************* * * Get a buffer from system mbuf buffer pool. * **********************************************************************/ int ixgb_get_buf(struct ixgb_softc *sc, int i, struct mbuf *nmp) { struct mbuf *mp = nmp; struct ixgb_buffer *rx_buffer; int error; if (mp == NULL) { MGETHDR(mp, M_DONTWAIT, MT_DATA); if (mp == NULL) { sc->mbuf_alloc_failed++; return (ENOBUFS); } MCLGET(mp, M_DONTWAIT); if ((mp->m_flags & M_EXT) == 0) { m_freem(mp); sc->mbuf_cluster_failed++; return (ENOBUFS); } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); rx_buffer = &sc->rx_buffer_area[i]; /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ error = bus_dmamap_load_mbuf(sc->rxtag, rx_buffer->map, mp, BUS_DMA_NOWAIT); if (error) { m_freem(mp); return (error); } rx_buffer->m_head = mp; bzero(&sc->rx_desc_base[i], sizeof(sc->rx_desc_base[i])); sc->rx_desc_base[i].buff_addr = htole64(rx_buffer->map->dm_segs[0].ds_addr); bus_dmamap_sync(sc->rxtag, rx_buffer->map, 0, rx_buffer->map->dm_mapsize, BUS_DMASYNC_PREREAD); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ int ixgb_allocate_receive_structures(struct ixgb_softc *sc) { int i, error; struct ixgb_buffer *rx_buffer; if (!(sc->rx_buffer_area = mallocarray(sc->num_rx_desc, sizeof(struct ixgb_buffer), M_DEVBUF, M_NOWAIT | M_ZERO))) { printf("%s: Unable to allocate rx_buffer memory\n", sc->sc_dv.dv_xname); return (ENOMEM); } sc->rxtag = sc->osdep.ixgb_pa.pa_dmat; rx_buffer = sc->rx_buffer_area; for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) { error = bus_dmamap_create(sc->rxtag, MCLBYTES, 1, MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map); if (error != 0) { printf("%s: ixgb_allocate_receive_structures: " "bus_dmamap_create failed; error %u\n", sc->sc_dv.dv_xname, error); goto fail; } } for (i = 0; i < sc->num_rx_desc; i++) { error = ixgb_get_buf(sc, i, NULL); if (error != 0) goto fail; } bus_dmamap_sync(sc->rxdma.dma_tag, sc->rxdma.dma_map, 0, sc->rxdma.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); fail: ixgb_free_receive_structures(sc); return (error); } /********************************************************************* * * Allocate and initialize receive structures. * **********************************************************************/ int ixgb_setup_receive_structures(struct ixgb_softc *sc) { bzero((void *)sc->rx_desc_base, (sizeof(struct ixgb_rx_desc)) * sc->num_rx_desc); if (ixgb_allocate_receive_structures(sc)) return (ENOMEM); /* Setup our descriptor pointers */ sc->next_rx_desc_to_check = 0; sc->next_rx_desc_to_use = 0; return (0); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ void ixgb_initialize_receive_unit(struct ixgb_softc *sc) { u_int32_t reg_rctl; u_int32_t reg_rxcsum; u_int32_t reg_rxdctl; u_int64_t bus_addr; /* * Make sure receives are disabled while setting up the descriptor * ring */ reg_rctl = IXGB_READ_REG(&sc->hw, RCTL); IXGB_WRITE_REG(&sc->hw, RCTL, reg_rctl & ~IXGB_RCTL_RXEN); /* Set the Receive Delay Timer Register */ IXGB_WRITE_REG(&sc->hw, RDTR, sc->rx_int_delay); /* Setup the Base and Length of the Rx Descriptor Ring */ bus_addr = sc->rxdma.dma_map->dm_segs[0].ds_addr; IXGB_WRITE_REG(&sc->hw, RDBAL, (u_int32_t)bus_addr); IXGB_WRITE_REG(&sc->hw, RDBAH, (u_int32_t)(bus_addr >> 32)); IXGB_WRITE_REG(&sc->hw, RDLEN, sc->num_rx_desc * sizeof(struct ixgb_rx_desc)); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGB_WRITE_REG(&sc->hw, RDH, 0); IXGB_WRITE_REG(&sc->hw, RDT, sc->num_rx_desc - 1); reg_rxdctl = RXDCTL_WTHRESH_DEFAULT << IXGB_RXDCTL_WTHRESH_SHIFT | RXDCTL_HTHRESH_DEFAULT << IXGB_RXDCTL_HTHRESH_SHIFT | RXDCTL_PTHRESH_DEFAULT << IXGB_RXDCTL_PTHRESH_SHIFT; IXGB_WRITE_REG(&sc->hw, RXDCTL, reg_rxdctl); sc->raidc = 1; if (sc->raidc) { uint32_t raidc; uint8_t poll_threshold; #define IXGB_RAIDC_POLL_DEFAULT 120 poll_threshold = ((sc->num_rx_desc - 1) >> 3); poll_threshold >>= 1; poll_threshold &= 0x3F; raidc = IXGB_RAIDC_EN | IXGB_RAIDC_RXT_GATE | (IXGB_RAIDC_POLL_DEFAULT << IXGB_RAIDC_POLL_SHIFT) | (sc->rx_int_delay << IXGB_RAIDC_DELAY_SHIFT) | poll_threshold; IXGB_WRITE_REG(&sc->hw, RAIDC, raidc); } /* Enable Receive Checksum Offload for TCP and UDP ? */ reg_rxcsum = IXGB_READ_REG(&sc->hw, RXCSUM); reg_rxcsum |= IXGB_RXCSUM_TUOFL; IXGB_WRITE_REG(&sc->hw, RXCSUM, reg_rxcsum); /* Setup the Receive Control Register */ reg_rctl = IXGB_READ_REG(&sc->hw, RCTL); reg_rctl &= ~(3 << IXGB_RCTL_MO_SHIFT); reg_rctl |= IXGB_RCTL_BAM | IXGB_RCTL_RDMTS_1_2 | IXGB_RCTL_SECRC | IXGB_RCTL_CFF | (sc->hw.mc_filter_type << IXGB_RCTL_MO_SHIFT); switch (sc->rx_buffer_len) { default: case IXGB_RXBUFFER_2048: reg_rctl |= IXGB_RCTL_BSIZE_2048; break; case IXGB_RXBUFFER_4096: reg_rctl |= IXGB_RCTL_BSIZE_4096; break; case IXGB_RXBUFFER_8192: reg_rctl |= IXGB_RCTL_BSIZE_8192; break; case IXGB_RXBUFFER_16384: reg_rctl |= IXGB_RCTL_BSIZE_16384; break; } reg_rctl |= IXGB_RCTL_RXEN; /* Enable Receives */ IXGB_WRITE_REG(&sc->hw, RCTL, reg_rctl); } /********************************************************************* * * Free receive related data structures. * **********************************************************************/ void ixgb_free_receive_structures(struct ixgb_softc *sc) { struct ixgb_buffer *rx_buffer; int i; INIT_DEBUGOUT("free_receive_structures: begin"); if (sc->rx_buffer_area != NULL) { rx_buffer = sc->rx_buffer_area; for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->map != NULL && rx_buffer->map->dm_nsegs > 0) { bus_dmamap_sync(sc->rxtag, rx_buffer->map, 0, rx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rxtag, rx_buffer->map); } if (rx_buffer->m_head != NULL) { m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } if (rx_buffer->map != NULL) { bus_dmamap_destroy(sc->rxtag, rx_buffer->map); rx_buffer->map = NULL; } } } if (sc->rx_buffer_area != NULL) { free(sc->rx_buffer_area, M_DEVBUF, 0); sc->rx_buffer_area = NULL; } if (sc->rxtag != NULL) sc->rxtag = NULL; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * *********************************************************************/ void ixgb_rxeof(struct ixgb_softc *sc, int count) { struct ifnet *ifp; struct mbuf_list ml = MBUF_LIST_INITIALIZER(); struct mbuf *mp; int eop = 0; int len; u_int8_t accept_frame = 0; int i; int next_to_use = 0; int eop_desc; /* Pointer to the receive descriptor being examined. */ struct ixgb_rx_desc *current_desc; ifp = &sc->interface_data.ac_if; i = sc->next_rx_desc_to_check; next_to_use = sc->next_rx_desc_to_use; eop_desc = sc->next_rx_desc_to_check; current_desc = &sc->rx_desc_base[i]; bus_dmamap_sync(sc->rxdma.dma_tag, sc->rxdma.dma_map, 0, sc->rxdma.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD); if (!((current_desc->status) & IXGB_RX_DESC_STATUS_DD)) return; while ((current_desc->status & IXGB_RX_DESC_STATUS_DD) && (count != 0) && (ifp->if_flags & IFF_RUNNING)) { mp = sc->rx_buffer_area[i].m_head; bus_dmamap_sync(sc->rxtag, sc->rx_buffer_area[i].map, 0, sc->rx_buffer_area[i].map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rxtag, sc->rx_buffer_area[i].map); accept_frame = 1; if (current_desc->status & IXGB_RX_DESC_STATUS_EOP) { count--; eop = 1; } else { eop = 0; } len = letoh16(current_desc->length); if (current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE)) accept_frame = 0; if (accept_frame) { /* Assign correct length to the current fragment */ mp->m_len = len; if (sc->fmp == NULL) { mp->m_pkthdr.len = len; sc->fmp = mp; /* Store the first mbuf */ sc->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; sc->lmp->m_next = mp; sc->lmp = sc->lmp->m_next; sc->fmp->m_pkthdr.len += len; } if (eop) { eop_desc = i; ixgb_receive_checksum(sc, current_desc, sc->fmp); #if NVLAN > 0 if (current_desc->status & IXGB_RX_DESC_STATUS_VP) { sc->fmp->m_pkthdr.ether_vtag = letoh16(current_desc->special); sc->fmp->m_flags |= M_VLANTAG; } #endif ml_enqueue(&ml, sc->fmp); sc->fmp = NULL; sc->lmp = NULL; } sc->rx_buffer_area[i].m_head = NULL; } else { sc->dropped_pkts++; m_freem(sc->fmp); sc->fmp = NULL; sc->lmp = NULL; } /* Zero out the receive descriptors status */ current_desc->status = 0; bus_dmamap_sync(sc->rxdma.dma_tag, sc->rxdma.dma_map, 0, sc->rxdma.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor */ if (++i == sc->num_rx_desc) { i = 0; current_desc = sc->rx_desc_base; } else current_desc++; } sc->next_rx_desc_to_check = i; if (--i < 0) i = (sc->num_rx_desc - 1); /* * 82597EX: Workaround for redundent write back in receive descriptor ring (causes * memory corruption). Avoid using and re-submitting the most recently received RX * descriptor back to hardware. * * if(Last written back descriptor == EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptor * back to hardware. * if(Last written back descriptor != EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptors * till last EOP bit set descriptor. */ if (eop_desc != i) { if (++eop_desc == sc->num_rx_desc) eop_desc = 0; i = eop_desc; } /* Replenish the descriptors with new mbufs till last EOP bit set descriptor */ while (next_to_use != i) { current_desc = &sc->rx_desc_base[next_to_use]; if ((current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE))) { mp = sc->rx_buffer_area[next_to_use].m_head; ixgb_get_buf(sc, next_to_use, mp); } else { if (ixgb_get_buf(sc, next_to_use, NULL) == ENOBUFS) break; } /* Advance our pointers to the next descriptor */ if (++next_to_use == sc->num_rx_desc) next_to_use = 0; } sc->next_rx_desc_to_use = next_to_use; if (--next_to_use < 0) next_to_use = (sc->num_rx_desc - 1); /* Advance the IXGB's Receive Queue #0 "Tail Pointer" */ IXGB_WRITE_REG(&sc->hw, RDT, next_to_use); if_input(ifp, &ml); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ void ixgb_receive_checksum(struct ixgb_softc *sc, struct ixgb_rx_desc *rx_desc, struct mbuf *mp) { if (rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & IXGB_RX_DESC_STATUS_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = M_IPV4_CSUM_IN_OK; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE)) { mp->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK; } } } /* * This turns on the hardware offload of the VLAN * tag insertion and strip */ void ixgb_enable_hw_vlans(struct ixgb_softc *sc) { uint32_t ctrl; ctrl = IXGB_READ_REG(&sc->hw, CTRL0); ctrl |= IXGB_CTRL0_VME; IXGB_WRITE_REG(&sc->hw, CTRL0, ctrl); } void ixgb_enable_intr(struct ixgb_softc *sc) { uint32_t val; val = IXGB_INT_RXT0 | IXGB_INT_TXDW | IXGB_INT_RXDMT0 | IXGB_INT_LSC | IXGB_INT_RXO; if (sc->hw.subsystem_vendor_id == SUN_SUBVENDOR_ID) val |= IXGB_INT_GPI0; IXGB_WRITE_REG(&sc->hw, IMS, val); } void ixgb_disable_intr(struct ixgb_softc *sc) { IXGB_WRITE_REG(&sc->hw, IMC, ~0); } void ixgb_write_pci_cfg(struct ixgb_hw *hw, uint32_t reg, uint16_t *value) { struct pci_attach_args *pa = &((struct ixgb_osdep *)hw->back)->ixgb_pa; pci_chipset_tag_t pc = pa->pa_pc; /* Should we do read/mask/write...? 16 vs 32 bit!!! */ pci_conf_write(pc, pa->pa_tag, reg, *value); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ void ixgb_update_stats_counters(struct ixgb_softc *sc) { struct ifnet *ifp; sc->stats.crcerrs += IXGB_READ_REG(&sc->hw, CRCERRS); sc->stats.gprcl += IXGB_READ_REG(&sc->hw, GPRCL); sc->stats.gprch += IXGB_READ_REG(&sc->hw, GPRCH); sc->stats.gorcl += IXGB_READ_REG(&sc->hw, GORCL); sc->stats.gorch += IXGB_READ_REG(&sc->hw, GORCH); sc->stats.bprcl += IXGB_READ_REG(&sc->hw, BPRCL); sc->stats.bprch += IXGB_READ_REG(&sc->hw, BPRCH); sc->stats.mprcl += IXGB_READ_REG(&sc->hw, MPRCL); sc->stats.mprch += IXGB_READ_REG(&sc->hw, MPRCH); sc->stats.roc += IXGB_READ_REG(&sc->hw, ROC); sc->stats.mpc += IXGB_READ_REG(&sc->hw, MPC); sc->stats.dc += IXGB_READ_REG(&sc->hw, DC); sc->stats.rlec += IXGB_READ_REG(&sc->hw, RLEC); sc->stats.xonrxc += IXGB_READ_REG(&sc->hw, XONRXC); sc->stats.xontxc += IXGB_READ_REG(&sc->hw, XONTXC); sc->stats.xoffrxc += IXGB_READ_REG(&sc->hw, XOFFRXC); sc->stats.xofftxc += IXGB_READ_REG(&sc->hw, XOFFTXC); sc->stats.gptcl += IXGB_READ_REG(&sc->hw, GPTCL); sc->stats.gptch += IXGB_READ_REG(&sc->hw, GPTCH); sc->stats.gotcl += IXGB_READ_REG(&sc->hw, GOTCL); sc->stats.gotch += IXGB_READ_REG(&sc->hw, GOTCH); sc->stats.ruc += IXGB_READ_REG(&sc->hw, RUC); sc->stats.rfc += IXGB_READ_REG(&sc->hw, RFC); sc->stats.rjc += IXGB_READ_REG(&sc->hw, RJC); sc->stats.torl += IXGB_READ_REG(&sc->hw, TORL); sc->stats.torh += IXGB_READ_REG(&sc->hw, TORH); sc->stats.totl += IXGB_READ_REG(&sc->hw, TOTL); sc->stats.toth += IXGB_READ_REG(&sc->hw, TOTH); sc->stats.tprl += IXGB_READ_REG(&sc->hw, TPRL); sc->stats.tprh += IXGB_READ_REG(&sc->hw, TPRH); sc->stats.tptl += IXGB_READ_REG(&sc->hw, TPTL); sc->stats.tpth += IXGB_READ_REG(&sc->hw, TPTH); sc->stats.plt64c += IXGB_READ_REG(&sc->hw, PLT64C); sc->stats.mptcl += IXGB_READ_REG(&sc->hw, MPTCL); sc->stats.mptch += IXGB_READ_REG(&sc->hw, MPTCH); sc->stats.bptcl += IXGB_READ_REG(&sc->hw, BPTCL); sc->stats.bptch += IXGB_READ_REG(&sc->hw, BPTCH); sc->stats.uprcl += IXGB_READ_REG(&sc->hw, UPRCL); sc->stats.uprch += IXGB_READ_REG(&sc->hw, UPRCH); sc->stats.vprcl += IXGB_READ_REG(&sc->hw, VPRCL); sc->stats.vprch += IXGB_READ_REG(&sc->hw, VPRCH); sc->stats.jprcl += IXGB_READ_REG(&sc->hw, JPRCL); sc->stats.jprch += IXGB_READ_REG(&sc->hw, JPRCH); sc->stats.rnbc += IXGB_READ_REG(&sc->hw, RNBC); sc->stats.icbc += IXGB_READ_REG(&sc->hw, ICBC); sc->stats.ecbc += IXGB_READ_REG(&sc->hw, ECBC); sc->stats.uptcl += IXGB_READ_REG(&sc->hw, UPTCL); sc->stats.uptch += IXGB_READ_REG(&sc->hw, UPTCH); sc->stats.vptcl += IXGB_READ_REG(&sc->hw, VPTCL); sc->stats.vptch += IXGB_READ_REG(&sc->hw, VPTCH); sc->stats.jptcl += IXGB_READ_REG(&sc->hw, JPTCL); sc->stats.jptch += IXGB_READ_REG(&sc->hw, JPTCH); sc->stats.tsctc += IXGB_READ_REG(&sc->hw, TSCTC); sc->stats.tsctfc += IXGB_READ_REG(&sc->hw, TSCTFC); sc->stats.ibic += IXGB_READ_REG(&sc->hw, IBIC); sc->stats.lfc += IXGB_READ_REG(&sc->hw, LFC); sc->stats.pfrc += IXGB_READ_REG(&sc->hw, PFRC); sc->stats.pftc += IXGB_READ_REG(&sc->hw, PFTC); sc->stats.mcfrc += IXGB_READ_REG(&sc->hw, MCFRC); ifp = &sc->interface_data.ac_if; /* Fill out the OS statistics structure */ ifp->if_collisions = 0; /* Rx Errors */ ifp->if_ierrors = sc->dropped_pkts + sc->stats.crcerrs + sc->stats.rnbc + sc->stats.mpc + sc->stats.rlec; /* Tx Errors */ ifp->if_oerrors = sc->watchdog_events; } #ifdef IXGB_DEBUG /********************************************************************** * * This routine is called only when ixgb_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ void ixgb_print_hw_stats(struct ixgb_softc *sc) { char buf_speed[100], buf_type[100]; ixgb_bus_speed bus_speed; ixgb_bus_type bus_type; const char * const unit = sc->sc_dv.dv_xname; bus_speed = sc->hw.bus.speed; bus_type = sc->hw.bus.type; snprintf(buf_speed, sizeof(buf_speed), bus_speed == ixgb_bus_speed_33 ? "33MHz" : bus_speed == ixgb_bus_speed_66 ? "66MHz" : bus_speed == ixgb_bus_speed_100 ? "100MHz" : bus_speed == ixgb_bus_speed_133 ? "133MHz" : "UNKNOWN"); printf("%s: PCI_Bus_Speed = %s\n", unit, buf_speed); snprintf(buf_type, sizeof(buf_type), bus_type == ixgb_bus_type_pci ? "PCI" : bus_type == ixgb_bus_type_pcix ? "PCI-X" : "UNKNOWN"); printf("%s: PCI_Bus_Type = %s\n", unit, buf_type); printf("%s: Tx Descriptors not Avail1 = %ld\n", unit, sc->no_tx_desc_avail1); printf("%s: Tx Descriptors not Avail2 = %ld\n", unit, sc->no_tx_desc_avail2); printf("%s: Std Mbuf Failed = %ld\n", unit, sc->mbuf_alloc_failed); printf("%s: Std Cluster Failed = %ld\n", unit, sc->mbuf_cluster_failed); printf("%s: Defer count = %lld\n", unit, (long long)sc->stats.dc); printf("%s: Missed Packets = %lld\n", unit, (long long)sc->stats.mpc); printf("%s: Receive No Buffers = %lld\n", unit, (long long)sc->stats.rnbc); printf("%s: Receive length errors = %lld\n", unit, (long long)sc->stats.rlec); printf("%s: Crc errors = %lld\n", unit, (long long)sc->stats.crcerrs); printf("%s: Driver dropped packets = %ld\n", unit, sc->dropped_pkts); printf("%s: XON Rcvd = %lld\n", unit, (long long)sc->stats.xonrxc); printf("%s: XON Xmtd = %lld\n", unit, (long long)sc->stats.xontxc); printf("%s: XOFF Rcvd = %lld\n", unit, (long long)sc->stats.xoffrxc); printf("%s: XOFF Xmtd = %lld\n", unit, (long long)sc->stats.xofftxc); printf("%s: Good Packets Rcvd = %lld\n", unit, (long long)sc->stats.gprcl); printf("%s: Good Packets Xmtd = %lld\n", unit, (long long)sc->stats.gptcl); printf("%s: Jumbo frames recvd = %lld\n", unit, (long long)sc->stats.jprcl); printf("%s: Jumbo frames Xmtd = %lld\n", unit, (long long)sc->stats.jptcl); } #endif
95 26 71 12 7 78 8 77 99 26 3 23 10 3 8 111 68 99 95 3 3 5 93 100 94 6 6 3 54 55 49 4 2 94 100 11 11 186 2 183 2 41 496 493 1 492 411 56 29 2 15 395 6 89 80 2 404 15 476 4 5 11 2 462 12 475 469 9 218 259 482 480 145 414 70 27 43 176 242 146 4 52 9 9 36 30 116 23 95 91 26 102 16 102 2 98 38 3 31 3 45 1 1 1 40 1 12 40 40 8 29 5 79 39 5 35 26 9 35 79 2 1 24 24 3 15 2 4 26 7 19 67 1 44 3 44 10 29 29 2 4 28 3 3 40 1 3 4 30 4 4 1 1 23 3 25 13 40 18 5 5 23 42 77 1 75 34 15 2 16 116 80 36 48 47 6 1 2 6 1 2 2 35 3 1 20 2 1 1 21 12 12 28 18 18 18 13 23 2 2 21 2 17 1 16 2 8 6 6 2 1 9 1 4 4 6 10 10 1 1 9 9 2 8 4 6 6 2 2 10 2 9 7 24 24 8 12 2627 2617 4 1 2625 2621 2271 2276 4 2280 764 3 593 79 6 6 3 3 42 16 18 6 8 1 1 6 1 1 3 3 3 4 2 1 3 3 3 2 7 3 31 11 76 55 208 3 112 41 3 3 3 3 3 3 3 3 3 3 5 2 3 4 2 2 5 21 1 13 9 1 1 1 2138 2131 13 2147 4 3 2 4 2130 2135 13 2 2 2111 2104 9 3 1 2109 3 2103 2104 10 1 1 /* $OpenBSD: uipc_socket.c,v 1.256 2021/02/24 13:19:48 bluhm Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 */ #include <sys/param.h> #include <sys/systm.h> #include <sys/proc.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/domain.h> #include <sys/kernel.h> #include <sys/event.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/unpcb.h> #include <sys/socketvar.h> #include <sys/signalvar.h> #include <net/if.h> #include <sys/pool.h> #include <sys/atomic.h> #include <sys/rwlock.h> #include <sys/time.h> #ifdef DDB #include <machine/db_machdep.h> #endif void sbsync(struct sockbuf *, struct mbuf *); int sosplice(struct socket *, int, off_t, struct timeval *); void sounsplice(struct socket *, struct socket *, int); void soidle(void *); void sotask(void *); void soreaper(void *); void soput(void *); int somove(struct socket *, int); void sorflush(struct socket *); void filt_sordetach(struct knote *kn); int filt_soread(struct knote *kn, long hint); void filt_sowdetach(struct knote *kn); int filt_sowrite(struct knote *kn, long hint); int filt_solisten(struct knote *kn, long hint); const struct filterops solisten_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_solisten, }; const struct filterops soread_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, }; const struct filterops sowrite_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; const struct filterops soexcept_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, }; #ifndef SOMINCONN #define SOMINCONN 80 #endif /* SOMINCONN */ int somaxconn = SOMAXCONN; int sominconn = SOMINCONN; struct pool socket_pool; #ifdef SOCKET_SPLICE struct pool sosplice_pool; struct taskq *sosplice_taskq; struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); #endif void soinit(void) { pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, "sockpl", NULL); #ifdef SOCKET_SPLICE pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, "sosppl", NULL); #endif } /* * Socket operation routines. * These routines are called by the routines in * sys_socket.c or from a system process, and * implement the semantics of socket operations by * switching out to the protocol specific routines. */ int socreate(int dom, struct socket **aso, int type, int proto) { struct proc *p = curproc; /* XXX */ const struct protosw *prp; struct socket *so; int error, s; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == NULL || prp->pr_attach == NULL) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); sigio_init(&so->so_sigio); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); so->so_type = type; if (suser(p) == 0) so->so_state = SS_PRIV; so->so_ruid = p->p_ucred->cr_ruid; so->so_euid = p->p_ucred->cr_uid; so->so_rgid = p->p_ucred->cr_rgid; so->so_egid = p->p_ucred->cr_gid; so->so_cpid = p->p_p->ps_pid; so->so_proto = prp; so->so_snd.sb_timeo_nsecs = INFSLP; so->so_rcv.sb_timeo_nsecs = INFSLP; s = solock(so); error = (*prp->pr_attach)(so, proto); if (error) { so->so_state |= SS_NOFDREF; /* sofree() calls sounlock(). */ sofree(so, s); return (error); } sounlock(so, s); *aso = so; return (0); } int sobind(struct socket *so, struct mbuf *nam, struct proc *p) { int error; soassertlocked(so); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); return (error); } int solisten(struct socket *so, int backlog) { int error; soassertlocked(so); if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) return (EINVAL); #ifdef SOCKET_SPLICE if (isspliced(so) || issplicedback(so)) return (EOPNOTSUPP); #endif /* SOCKET_SPLICE */ error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, curproc); if (error) return (error); if (TAILQ_FIRST(&so->so_q) == NULL) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; if (backlog < sominconn) backlog = sominconn; so->so_qlimit = backlog; return (0); } #define SOSP_FREEING_READ 1 #define SOSP_FREEING_WRITE 2 void sofree(struct socket *so, int s) { soassertlocked(so); if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { sounlock(so, s); return; } if (so->so_head) { /* * We must not decommission a socket that's on the accept(2) * queue. If we do, then accept(2) may hang after select(2) * indicated that the listening socket was ready. */ if (!soqremque(so, 0)) { sounlock(so, s); return; } } sigio_free(&so->so_sigio); #ifdef SOCKET_SPLICE if (so->so_sp) { if (issplicedback(so)) { int freeing = SOSP_FREEING_WRITE; if (so->so_sp->ssp_soback == so) freeing |= SOSP_FREEING_READ; sounsplice(so->so_sp->ssp_soback, so, freeing); } if (isspliced(so)) { int freeing = SOSP_FREEING_READ; if (so == so->so_sp->ssp_socket) freeing |= SOSP_FREEING_WRITE; sounsplice(so, so->so_sp->ssp_socket, freeing); } } #endif /* SOCKET_SPLICE */ sbrelease(so, &so->so_snd); sorflush(so); sounlock(so, s); #ifdef SOCKET_SPLICE if (so->so_sp) { /* Reuse splice idle, sounsplice() has been called before. */ timeout_set_proc(&so->so_sp->ssp_idleto, soreaper, so); timeout_add(&so->so_sp->ssp_idleto, 0); } else #endif /* SOCKET_SPLICE */ { pool_put(&socket_pool, so); } } static inline uint64_t solinger_nsec(struct socket *so) { if (so->so_linger == 0) return INFSLP; return SEC_TO_NSEC(so->so_linger); } /* * Close a socket on last file table reference removal. * Initiate disconnect if connected. * Free socket when disconnect complete. */ int soclose(struct socket *so, int flags) { struct socket *so2; int s, error = 0; s = solock(so); /* Revoke async IO early. There is a final revocation in sofree(). */ sigio_free(&so->so_sigio); if (so->so_options & SO_ACCEPTCONN) { while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { (void) soqremque(so2, 0); (void) soabort(so2); } while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { (void) soqremque(so2, 1); (void) soabort(so2); } } if (so->so_pcb == NULL) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) goto drop; } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (flags & MSG_DONTWAIT)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = sosleep_nsec(so, &so->so_timeo, PSOCK | PCATCH, "netcls", solinger_nsec(so)); if (error) break; } } } drop: if (so->so_pcb) { int error2; KASSERT(so->so_proto->pr_detach); error2 = (*so->so_proto->pr_detach)(so); if (error == 0) error = error2; } discard: if (so->so_state & SS_NOFDREF) panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); so->so_state |= SS_NOFDREF; /* sofree() calls sounlock(). */ sofree(so, s); return (error); } int soabort(struct socket *so) { soassertlocked(so); return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, curproc); } int soaccept(struct socket *so, struct mbuf *nam) { int error = 0; soassertlocked(so); if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); so->so_state &= ~SS_NOFDREF; if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, nam, NULL, curproc); else error = ECONNABORTED; return (error); } int soconnect(struct socket *so, struct mbuf *nam) { int error; soassertlocked(so); if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. * This allows user to disconnect by connecting to, e.g., * a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) error = EISCONN; else error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, NULL, nam, NULL, curproc); return (error); } int soconnect2(struct socket *so1, struct socket *so2) { int s, error; s = solock(so1); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, (struct mbuf *)so2, NULL, curproc); sounlock(so1, s); return (error); } int sodisconnect(struct socket *so) { int error; soassertlocked(so); if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, NULL, curproc); return (error); } int m_getuio(struct mbuf **, int, long, struct uio *); #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) /* * Send on a socket. * If send must go all at once and message is larger than * send buffering, then hard error. * Lock against other senders. * If must go all at once and not enough room now, then * inform user that this would block and do nothing. * Otherwise, if nonblocking, send as much as possible. * The data to be sent is described by "uio" if nonzero, * otherwise by the mbuf chain "top" (which must be null * if uio is not). Data provided in mbuf chain must be small * enough to send all at once. * * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. */ int sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags) { long space, clen = 0; size_t resid; int error, s; int atomic = sosendallatonce(so) || top; if (uio) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* MSG_EOR on a SOCK_STREAM socket is invalid. */ if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { m_freem(top); m_freem(control); return (EINVAL); } if (uio && uio->uio_procp) uio->uio_procp->p_ru.ru_msgsnd++; if (control) { /* * In theory clen should be unsigned (since control->m_len is). * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and clen. */ clen = control->m_len; /* reserve extra space for AF_UNIX's internalize */ if (so->so_proto->pr_domain->dom_family == AF_UNIX && clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) clen = CMSG_SPACE( (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * (sizeof(struct fdpass) / sizeof(int))); } #define snderr(errno) { error = errno; goto release; } s = solock(so); restart: if ((error = sblock(so, &so->so_snd, SBLOCKWAIT(flags))) != 0) goto out; so->so_state |= SS_ISSENDING; do { if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; snderr(error); } if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (!(resid == 0 && clen != 0)) snderr(ENOTCONN); } else if (addr == 0) snderr(EDESTADDRREQ); } space = sbspace(so, &so->so_snd); if (flags & MSG_OOB) space += 1024; if (so->so_proto->pr_domain->dom_family == AF_UNIX) { if (atomic && resid > so->so_snd.sb_hiwat) snderr(EMSGSIZE); } else { if (clen > so->so_snd.sb_hiwat || (atomic && resid > so->so_snd.sb_hiwat - clen)) snderr(EMSGSIZE); } if (space < clen || (space - clen < resid && (atomic || space < so->so_snd.sb_lowat))) { if (flags & MSG_DONTWAIT) snderr(EWOULDBLOCK); sbunlock(so, &so->so_snd); error = sbwait(so, &so->so_snd); so->so_state &= ~SS_ISSENDING; if (error) goto out; goto restart; } space -= clen; do { if (uio == NULL) { /* * Data is prepackaged in "top". */ resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { sounlock(so, s); error = m_getuio(&top, atomic, space, uio); s = solock(so); if (error) goto release; space -= top->m_pkthdr.len; resid = uio->uio_resid; if (flags & MSG_EOR) top->m_flags |= M_EOR; } if (resid == 0) so->so_state &= ~SS_ISSENDING; if (top && so->so_options & SO_ZEROIZE) top->m_flags |= M_ZEROIZE; error = (*so->so_proto->pr_usrreq)(so, (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, top, addr, control, curproc); clen = 0; control = NULL; top = NULL; if (error) goto release; } while (resid && space > 0); } while (resid); release: so->so_state &= ~SS_ISSENDING; sbunlock(so, &so->so_snd); out: sounlock(so, s); m_freem(top); m_freem(control); return (error); } int m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) { struct mbuf *m, *top = NULL; struct mbuf **nextp = &top; u_long len, mlen; size_t resid = uio->uio_resid; int error; do { if (top == NULL) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.ph_ifidx = 0; } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; } /* chain mbuf together */ *nextp = m; nextp = &m->m_next; resid = ulmin(resid, space); if (resid >= MINCLSIZE) { MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); if ((m->m_flags & M_EXT) == 0) MCLGETL(m, M_NOWAIT, MCLBYTES); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = m->m_ext.ext_size; len = ulmin(mlen, resid); /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && m == top && len < mlen - max_hdr) m->m_data += max_hdr; } else { nopages: len = ulmin(mlen, resid); /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && m == top && len < mlen - max_hdr) m_align(m, len); } error = uiomove(mtod(m, caddr_t), len, uio); if (error) { m_freem(top); return (error); } /* adjust counters */ resid = uio->uio_resid; space -= len; m->m_len = len; top->m_pkthdr.len += len; /* Is there more space and more data? */ } while (space > 0 && resid > 0); *mp = top; return 0; } /* * Following replacement or removal of the first mbuf on the first * mbuf chain of a socket buffer, push necessary state changes back * into the socket buffer so that other consumers see the values * consistently. 'nextrecord' is the callers locally stored value of * the original value of sb->sb_mb->m_nextpkt which must be restored * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. */ void sbsync(struct sockbuf *sb, struct mbuf *nextrecord) { /* * First, update for the new value of nextrecord. If necessary, * make it the first record. */ if (sb->sb_mb != NULL) sb->sb_mb->m_nextpkt = nextrecord; else sb->sb_mb = nextrecord; /* * Now update any dependent socket buffer fields to reflect * the new state. This is an inline of SB_EMPTY_FIXUP, with * the addition of a second clause that takes care of the * case where sb_mb has been updated, but remains the last * record. */ if (sb->sb_mb == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (sb->sb_mb->m_nextpkt == NULL) sb->sb_lastrecord = sb->sb_mb; } /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf * by sbappend*. In particular, each record (mbufs linked through m_next) * must begin with an address if the protocol so specifies, * followed by an optional mbuf or mbufs containing ancillary data, * and then zero or more mbufs of data. * In order to avoid blocking network for the entire time here, we release * the solock() while doing the actual copy to user space. * Although the sockbuf is locked, new data may still be appended, * and thus we must maintain consistency of the sockbuf during that time. * * The caller may receive the data as a single mbuf chain by supplying * an mbuf **mp0 for use in returning the chain. The uio is then used * only for the count in uio_resid. */ int soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp, socklen_t controllen) { struct mbuf *m, **mp; struct mbuf *cm; u_long len, offset, moff; int flags, error, s, type, uio_error = 0; const struct protosw *pr = so->so_proto; struct mbuf *nextrecord; size_t resid, orig_resid = uio->uio_resid; mp = mp0; if (paddr) *paddr = NULL; if (controlp) *controlp = NULL; if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); s = solock(so); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); sounlock(so, s); if (error) goto bad; do { error = uiomove(mtod(m, caddr_t), ulmin(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: m_freem(m); return (error); } if (mp) *mp = NULL; s = solock(so); restart: if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) { sounlock(so, s); return (error); } m = so->so_rcv.sb_mb; #ifdef SOCKET_SPLICE if (isspliced(so)) m = NULL; #endif /* SOCKET_SPLICE */ /* * If we have less data than requested, block awaiting more * (subject to any timeout) if: * 1. the current count is less than the low water mark, * 2. MSG_WAITALL is set, and it is possible to do the entire * receive operation at once if we block (resid <= hiwat), or * 3. MSG_DONTWAIT is not set. * If MSG_WAITALL is set but resid is larger than the receive buffer, * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { #ifdef DIAGNOSTIC if (m == NULL && so->so_rcv.sb_cc) #ifdef SOCKET_SPLICE if (!isspliced(so)) #endif /* SOCKET_SPLICE */ panic("receive 1: so %p, so_type %d, sb_cc %lu", so, so->so_type, so->so_rcv.sb_cc); #endif if (so->so_error) { if (m) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { if (m) goto dontblock; else if (so->so_rcv.sb_cc == 0) goto release; } for (; m; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { error = ENOTCONN; goto release; } if (uio->uio_resid == 0 && controlp == NULL) goto release; if (flags & MSG_DONTWAIT) { error = EWOULDBLOCK; goto release; } SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(so, &so->so_rcv); error = sbwait(so, &so->so_rcv); if (error) { sounlock(so, s); return (error); } goto restart; } dontblock: /* * On entry here, m points to the first record of the socket buffer. * From this point onward, we maintain 'nextrecord' as a cache of the * pointer to the next record in the socket buffer. We must keep the * various socket buffer pointers and local stack versions of the * pointers in sync, pushing out modifications before operations that * may sleep, and re-reading them afterwards. * * Otherwise, we will race with the network stack appending new data * or records onto the socket buffer by using inconsistent/stale * versions of the field, possibly resulting in socket buffer * corruption. */ if (uio->uio_procp) uio->uio_procp->p_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC if (m->m_type != MT_SONAME) panic("receive 1a: so %p, so_type %d, m %p, m_type %d", so, so->so_type, m, m->m_type); #endif orig_resid = 0; if (flags & MSG_PEEK) { if (paddr) *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); m = m->m_next; } else { sbfree(&so->so_rcv, m); if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } sbsync(&so->so_rcv, nextrecord); } } while (m && m->m_type == MT_CONTROL && error == 0) { int skip = 0; if (flags & MSG_PEEK) { if (mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) { /* don't leak internalized SCM_RIGHTS msgs */ skip = 1; } else if (controlp) *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m->m_next; m->m_nextpkt = m->m_next = NULL; cm = m; m = so->so_rcv.sb_mb; sbsync(&so->so_rcv, nextrecord); if (controlp) { if (pr->pr_domain->dom_externalize) { error = (*pr->pr_domain->dom_externalize) (cm, controllen, flags); } *controlp = cm; } else { /* * Dispose of any SCM_RIGHTS message that went * through the read path rather than recv. */ if (pr->pr_domain->dom_dispose) pr->pr_domain->dom_dispose(cm); m_free(cm); } } if (m != NULL) nextrecord = so->so_rcv.sb_mb->m_nextpkt; else nextrecord = so->so_rcv.sb_mb; if (controlp && !skip) controlp = &(*controlp)->m_next; orig_resid = 0; } /* If m is non-NULL, we have some data to read. */ if (m) { type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; if (m->m_flags & M_BCAST) flags |= MSG_BCAST; if (m->m_flags & M_MCAST) flags |= MSG_MCAST; } SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; } else if (type == MT_OOBDATA) { break; } else if (m->m_type == MT_CONTROL) { /* * If there is more than one control message in the * stream, we do a short read. Next can be received * or disposed by another system call. */ break; #ifdef DIAGNOSTIC } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { panic("receive 3: so %p, so_type %d, m %p, m_type %d", so, so->so_type, m, m->m_type); #endif } so->so_state &= ~SS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. * Otherwise copy them out via the uio, then free. * Sockbuf must be consistent here (points to current mbuf, * it points to next record) when we drop priority; * we must note any additions to the sockbuf when we * block interrupts again. */ if (mp == NULL && uio_error == 0) { SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); resid = uio->uio_resid; sounlock(so, s); uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); s = solock(so); if (uio_error) uio->uio_resid = resid - len; } else uio->uio_resid -= len; if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; orig_resid = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } /* * If m != NULL, we also know that * so->so_rcv.sb_mb != NULL. */ KASSERT(so->so_rcv.sb_mb == m); if (m) { m->m_nextpkt = nextrecord; if (nextrecord == NULL) so->so_rcv.sb_lastrecord = m; } else { so->so_rcv.sb_mb = nextrecord; SB_EMPTY_FIXUP(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } } else { if (flags & MSG_PEEK) { moff += len; orig_resid = 0; } else { if (mp) *mp = m_copym(m, 0, len, M_WAIT); m->m_data += len; m->m_len -= len; so->so_rcv.sb_cc -= len; so->so_rcv.sb_datacc -= len; } } if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), * we must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return * with a short count but without error. * Keep sockbuf locked against other readers. */ while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(so, &so->so_rcv); if (error) { sbunlock(so, &so->so_rcv); sounlock(so, s); return (0); } if ((m = so->so_rcv.sb_mb) != NULL) nextrecord = m->m_nextpkt; } } if (m && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { /* * First part is an inline SB_EMPTY_FIXUP(). Second * part makes sure sb_lastrecord is up-to-date if * there is still data in the socket buffer. */ so->so_rcv.sb_mb = nextrecord; if (so->so_rcv.sb_mb == NULL) { so->so_rcv.sb_mbtail = NULL; so->so_rcv.sb_lastrecord = NULL; } else if (nextrecord->m_nextpkt == NULL) so->so_rcv.sb_lastrecord = nextrecord; } SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, NULL, (struct mbuf *)(long)flags, NULL, curproc); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(so, &so->so_rcv); goto restart; } if (uio_error) error = uio_error; if (flagsp) *flagsp |= flags; release: sbunlock(so, &so->so_rcv); sounlock(so, s); return (error); } int soshutdown(struct socket *so, int how) { const struct protosw *pr = so->so_proto; int s, error = 0; s = solock(so); switch (how) { case SHUT_RD: sorflush(so); break; case SHUT_RDWR: sorflush(so); /* FALLTHROUGH */ case SHUT_WR: error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, curproc); break; default: error = EINVAL; break; } sounlock(so, s); return (error); } void sorflush(struct socket *so) { struct sockbuf *sb = &so->so_rcv; struct mbuf *m; const struct protosw *pr = so->so_proto; int error; sb->sb_flags |= SB_NOINTR; error = sblock(so, sb, M_WAITOK); /* with SB_NOINTR and M_WAITOK sblock() must not fail */ KASSERT(error == 0); socantrcvmore(so); sbunlock(so, sb); m = sb->sb_mb; memset(&sb->sb_startzero, 0, (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); sb->sb_timeo_nsecs = INFSLP; if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(m); m_purge(m); } #ifdef SOCKET_SPLICE #define so_splicelen so_sp->ssp_len #define so_splicemax so_sp->ssp_max #define so_idletv so_sp->ssp_idletv #define so_idleto so_sp->ssp_idleto #define so_splicetask so_sp->ssp_task int sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) { struct file *fp; struct socket *sosp; struct sosplice *sp; struct taskq *tq; int error = 0; soassertlocked(so); if (sosplice_taskq == NULL) { rw_enter_write(&sosplice_lock); if (sosplice_taskq == NULL) { tq = taskq_create("sosplice", 1, IPL_SOFTNET, TASKQ_MPSAFE); /* Ensure the taskq is fully visible to other CPUs. */ membar_producer(); sosplice_taskq = tq; } rw_exit_write(&sosplice_lock); } if (sosplice_taskq == NULL) return (ENOMEM); if ((so->so_proto->pr_flags & PR_SPLICE) == 0) return (EPROTONOSUPPORT); if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) return (ENOTCONN); if (so->so_sp == NULL) { sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); if (so->so_sp == NULL) so->so_sp = sp; else pool_put(&sosplice_pool, sp); } /* If no fd is given, unsplice by removing existing link. */ if (fd < 0) { /* Lock receive buffer. */ if ((error = sblock(so, &so->so_rcv, M_WAITOK)) != 0) { return (error); } if (so->so_sp->ssp_socket) sounsplice(so, so->so_sp->ssp_socket, 0); sbunlock(so, &so->so_rcv); return (0); } if (max && max < 0) return (EINVAL); if (tv && (tv->tv_sec < 0 || !timerisvalid(tv))) return (EINVAL); /* Find sosp, the drain socket where data will be spliced into. */ if ((error = getsock(curproc, fd, &fp)) != 0) return (error); sosp = fp->f_data; if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { error = EPROTONOSUPPORT; goto frele; } if (sosp->so_sp == NULL) { sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); if (sosp->so_sp == NULL) sosp->so_sp = sp; else pool_put(&sosplice_pool, sp); } /* Lock both receive and send buffer. */ if ((error = sblock(so, &so->so_rcv, M_WAITOK)) != 0) { goto frele; } if ((error = sblock(so, &sosp->so_snd, M_WAITOK)) != 0) { sbunlock(so, &so->so_rcv); goto frele; } if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { error = EBUSY; goto release; } if (sosp->so_options & SO_ACCEPTCONN) { error = EOPNOTSUPP; goto release; } if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { error = ENOTCONN; goto release; } /* Splice so and sosp together. */ so->so_sp->ssp_socket = sosp; sosp->so_sp->ssp_soback = so; so->so_splicelen = 0; so->so_splicemax = max; if (tv) so->so_idletv = *tv; else timerclear(&so->so_idletv); timeout_set_proc(&so->so_idleto, soidle, so); task_set(&so->so_splicetask, sotask, so); /* * To prevent softnet interrupt from calling somove() while * we sleep, the socket buffers are not marked as spliced yet. */ if (somove(so, M_WAIT)) { so->so_rcv.sb_flags |= SB_SPLICE; sosp->so_snd.sb_flags |= SB_SPLICE; } release: sbunlock(sosp, &sosp->so_snd); sbunlock(so, &so->so_rcv); frele: /* * FRELE() must not be called with the socket lock held. It is safe to * release the lock here as long as no other operation happen on the * socket when sosplice() returns. The dance could be avoided by * grabbing the socket lock inside this function. */ sounlock(so, SL_LOCKED); FRELE(fp, curproc); solock(so); return (error); } void sounsplice(struct socket *so, struct socket *sosp, int freeing) { soassertlocked(so); task_del(sosplice_taskq, &so->so_splicetask); timeout_del(&so->so_idleto); sosp->so_snd.sb_flags &= ~SB_SPLICE; so->so_rcv.sb_flags &= ~SB_SPLICE; so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; /* Do not wakeup a socket that is about to be freed. */ if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so)) sorwakeup(so); if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp)) sowwakeup(sosp); } void soidle(void *arg) { struct socket *so = arg; int s; s = solock(so); if (so->so_rcv.sb_flags & SB_SPLICE) { so->so_error = ETIMEDOUT; sounsplice(so, so->so_sp->ssp_socket, 0); } sounlock(so, s); } void sotask(void *arg) { struct socket *so = arg; int s; s = solock(so); if (so->so_rcv.sb_flags & SB_SPLICE) { /* * We may not sleep here as sofree() and unsplice() may be * called from softnet interrupt context. This would remove * the socket during somove(). */ somove(so, M_DONTWAIT); } sounlock(so, s); /* Avoid user land starvation. */ yield(); } /* * The socket splicing task or idle timeout may sleep while grabbing the net * lock. As sofree() can be called anytime, sotask() or soidle() could access * the socket memory of a freed socket after wakeup. So delay the pool_put() * after all pending socket splicing tasks or timeouts have finished. Do this * by scheduling it on the same threads. */ void soreaper(void *arg) { struct socket *so = arg; /* Reuse splice task, sounsplice() has been called before. */ task_set(&so->so_sp->ssp_task, soput, so); task_add(sosplice_taskq, &so->so_sp->ssp_task); } void soput(void *arg) { struct socket *so = arg; pool_put(&sosplice_pool, so->so_sp); pool_put(&socket_pool, so); } /* * Move data from receive buffer of spliced source socket to send * buffer of drain socket. Try to move as much as possible in one * big chunk. It is a TCP only implementation. * Return value 0 means splicing has been finished, 1 continue. */ int somove(struct socket *so, int wait) { struct socket *sosp = so->so_sp->ssp_socket; struct mbuf *m, **mp, *nextrecord; u_long len, off, oobmark; long space; int error = 0, maxreached = 0; unsigned int state; soassertlocked(so); nextpkt: if (so->so_error) { error = so->so_error; goto release; } if (sosp->so_state & SS_CANTSENDMORE) { error = EPIPE; goto release; } if (sosp->so_error && sosp->so_error != ETIMEDOUT && sosp->so_error != EFBIG && sosp->so_error != ELOOP) { error = sosp->so_error; goto release; } if ((sosp->so_state & SS_ISCONNECTED) == 0) goto release; /* Calculate how many bytes can be copied now. */ len = so->so_rcv.sb_datacc; if (so->so_splicemax) { KASSERT(so->so_splicelen < so->so_splicemax); if (so->so_splicemax <= so->so_splicelen + len) { len = so->so_splicemax - so->so_splicelen; maxreached = 1; } } space = sbspace(sosp, &sosp->so_snd); if (so->so_oobmark && so->so_oobmark < len && so->so_oobmark < space + 1024) space += 1024; if (space <= 0) { maxreached = 0; goto release; } if (space < len) { maxreached = 0; if (space < sosp->so_snd.sb_lowat) goto release; len = space; } sosp->so_state |= SS_ISSENDING; SBLASTRECORDCHK(&so->so_rcv, "somove 1"); SBLASTMBUFCHK(&so->so_rcv, "somove 1"); m = so->so_rcv.sb_mb; if (m == NULL) goto release; nextrecord = m->m_nextpkt; /* Drop address and control information not used with splicing. */ if (so->so_proto->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC if (m->m_type != MT_SONAME) panic("somove soname: so %p, so_type %d, m %p, " "m_type %d", so, so->so_type, m, m->m_type); #endif m = m->m_next; } while (m && m->m_type == MT_CONTROL) m = m->m_next; if (m == NULL) { sbdroprecord(&so->so_rcv); if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, NULL); goto nextpkt; } /* * By splicing sockets connected to localhost, userland might create a * loop. Dissolve splicing with error if loop is detected by counter. * * If we deal with looped broadcast/multicast packet we bail out with * no error to suppress splice termination. */ if ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) || ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) { error = ELOOP; goto release; } if (so->so_proto->pr_flags & PR_ATOMIC) { if ((m->m_flags & M_PKTHDR) == 0) panic("somove !PKTHDR: so %p, so_type %d, m %p, " "m_type %d", so, so->so_type, m, m->m_type); if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { error = EMSGSIZE; goto release; } if (len < m->m_pkthdr.len) goto release; if (m->m_pkthdr.len < len) { maxreached = 0; len = m->m_pkthdr.len; } /* * Throw away the name mbuf after it has been assured * that the whole first record can be processed. */ m = so->so_rcv.sb_mb; sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); sbsync(&so->so_rcv, nextrecord); } /* * Throw away the control mbufs after it has been assured * that the whole first record can be processed. */ m = so->so_rcv.sb_mb; while (m && m->m_type == MT_CONTROL) { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; sbsync(&so->so_rcv, nextrecord); } SBLASTRECORDCHK(&so->so_rcv, "somove 2"); SBLASTMBUFCHK(&so->so_rcv, "somove 2"); /* Take at most len mbufs out of receive buffer. */ for (off = 0, mp = &m; off <= len && *mp; off += (*mp)->m_len, mp = &(*mp)->m_next) { u_long size = len - off; #ifdef DIAGNOSTIC if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) panic("somove type: so %p, so_type %d, m %p, " "m_type %d", so, so->so_type, *mp, (*mp)->m_type); #endif if ((*mp)->m_len > size) { /* * Move only a partial mbuf at maximum splice length or * if the drain buffer is too small for this large mbuf. */ if (!maxreached && so->so_snd.sb_datacc > 0) { len -= size; break; } *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); if (*mp == NULL) { len -= size; break; } so->so_rcv.sb_mb->m_data += size; so->so_rcv.sb_mb->m_len -= size; so->so_rcv.sb_cc -= size; so->so_rcv.sb_datacc -= size; } else { *mp = so->so_rcv.sb_mb; sbfree(&so->so_rcv, *mp); so->so_rcv.sb_mb = (*mp)->m_next; sbsync(&so->so_rcv, nextrecord); } } *mp = NULL; SBLASTRECORDCHK(&so->so_rcv, "somove 3"); SBLASTMBUFCHK(&so->so_rcv, "somove 3"); SBCHECK(&so->so_rcv); if (m == NULL) goto release; m->m_nextpkt = NULL; if (m->m_flags & M_PKTHDR) { m_resethdr(m); m->m_pkthdr.len = len; } /* Send window update to source peer as receive buffer has changed. */ if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb) (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, NULL); /* Receive buffer did shrink by len bytes, adjust oob. */ state = so->so_state; so->so_state &= ~SS_RCVATMARK; oobmark = so->so_oobmark; so->so_oobmark = oobmark > len ? oobmark - len : 0; if (oobmark) { if (oobmark == len) so->so_state |= SS_RCVATMARK; if (oobmark >= len) oobmark = 0; } /* * Handle oob data. If any malloc fails, ignore error. * TCP urgent data is not very reliable anyway. */ while (((state & SS_RCVATMARK) || oobmark) && (so->so_options & SO_OOBINLINE)) { struct mbuf *o = NULL; if (state & SS_RCVATMARK) { o = m_get(wait, MT_DATA); state &= ~SS_RCVATMARK; } else if (oobmark) { o = m_split(m, oobmark, wait); if (o) { error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, NULL); if (error) { if (sosp->so_state & SS_CANTSENDMORE) error = EPIPE; m_freem(o); goto release; } len -= oobmark; so->so_splicelen += oobmark; m = o; o = m_get(wait, MT_DATA); } oobmark = 0; } if (o) { o->m_len = 1; *mtod(o, caddr_t) = *mtod(m, caddr_t); error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, o, NULL, NULL, NULL); if (error) { if (sosp->so_state & SS_CANTSENDMORE) error = EPIPE; m_freem(m); goto release; } len -= 1; so->so_splicelen += 1; if (oobmark) { oobmark -= 1; if (oobmark == 0) state |= SS_RCVATMARK; } m_adj(m, 1); } } /* Append all remaining data to drain socket. */ if (so->so_rcv.sb_cc == 0 || maxreached) sosp->so_state &= ~SS_ISSENDING; error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, NULL, NULL); if (error) { if (sosp->so_state & SS_CANTSENDMORE) error = EPIPE; goto release; } so->so_splicelen += len; /* Move several packets if possible. */ if (!maxreached && nextrecord) goto nextpkt; release: sosp->so_state &= ~SS_ISSENDING; if (!error && maxreached && so->so_splicemax == so->so_splicelen) error = EFBIG; if (error) so->so_error = error; if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { sounsplice(so, sosp, 0); return (0); } if (timerisset(&so->so_idletv)) timeout_add_tv(&so->so_idleto, &so->so_idletv); return (1); } #endif /* SOCKET_SPLICE */ void sorwakeup(struct socket *so) { soassertlocked(so); #ifdef SOCKET_SPLICE if (so->so_rcv.sb_flags & SB_SPLICE) { /* * TCP has a sendbuffer that can handle multiple packets * at once. So queue the stream a bit to accumulate data. * The sosplice thread will call somove() later and send * the packets calling tcp_output() only once. * In the UDP case, send out the packets immediately. * Using a thread would make things slower. */ if (so->so_proto->pr_flags & PR_WANTRCVD) task_add(sosplice_taskq, &so->so_splicetask); else somove(so, M_DONTWAIT); } if (isspliced(so)) return; #endif sowakeup(so, &so->so_rcv); if (so->so_upcall) (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); } void sowwakeup(struct socket *so) { soassertlocked(so); #ifdef SOCKET_SPLICE if (so->so_snd.sb_flags & SB_SPLICE) task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask); if (issplicedback(so)) return; #endif sowakeup(so, &so->so_snd); } int sosetopt(struct socket *so, int level, int optname, struct mbuf *m) { int error = 0; soassertlocked(so); if (level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput) { error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, level, optname, m); return (error); } error = ENOPROTOOPT; } else { switch (optname) { case SO_BINDANY: if ((error = suser(curproc)) != 0) /* XXX */ return (error); break; } switch (optname) { case SO_LINGER: if (m == NULL || m->m_len != sizeof (struct linger) || mtod(m, struct linger *)->l_linger < 0 || mtod(m, struct linger *)->l_linger > SHRT_MAX) return (EINVAL); so->so_linger = mtod(m, struct linger *)->l_linger; /* FALLTHROUGH */ case SO_BINDANY: case SO_DEBUG: case SO_KEEPALIVE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_ZEROIZE: if (m == NULL || m->m_len < sizeof (int)) return (EINVAL); if (*mtod(m, int *)) so->so_options |= optname; else so->so_options &= ~optname; break; case SO_DONTROUTE: if (m == NULL || m->m_len < sizeof (int)) return (EINVAL); if (*mtod(m, int *)) error = EOPNOTSUPP; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: { u_long cnt; if (m == NULL || m->m_len < sizeof (int)) return (EINVAL); cnt = *mtod(m, int *); if ((long)cnt <= 0) cnt = 1; switch (optname) { case SO_SNDBUF: if (so->so_state & SS_CANTSENDMORE) return (EINVAL); if (sbcheckreserve(cnt, so->so_snd.sb_wat) || sbreserve(so, &so->so_snd, cnt)) return (ENOBUFS); so->so_snd.sb_wat = cnt; break; case SO_RCVBUF: if (so->so_state & SS_CANTRCVMORE) return (EINVAL); if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || sbreserve(so, &so->so_rcv, cnt)) return (ENOBUFS); so->so_rcv.sb_wat = cnt; break; case SO_SNDLOWAT: so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? so->so_snd.sb_hiwat : cnt; break; case SO_RCVLOWAT: so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : cnt; break; } break; } case SO_SNDTIMEO: case SO_RCVTIMEO: { struct timeval tv; uint64_t nsecs; if (m == NULL || m->m_len < sizeof (tv)) return (EINVAL); memcpy(&tv, mtod(m, struct timeval *), sizeof tv); if (!timerisvalid(&tv)) return (EINVAL); nsecs = TIMEVAL_TO_NSEC(&tv); if (nsecs == UINT64_MAX) return (EDOM); if (nsecs == 0) nsecs = INFSLP; switch (optname) { case SO_SNDTIMEO: so->so_snd.sb_timeo_nsecs = nsecs; break; case SO_RCVTIMEO: so->so_rcv.sb_timeo_nsecs = nsecs; break; } break; } case SO_RTABLE: if (so->so_proto->pr_domain && so->so_proto->pr_domain->dom_protosw && so->so_proto->pr_ctloutput) { struct domain *dom = so->so_proto->pr_domain; level = dom->dom_protosw->pr_protocol; error = (*so->so_proto->pr_ctloutput) (PRCO_SETOPT, so, level, optname, m); return (error); } error = ENOPROTOOPT; break; #ifdef SOCKET_SPLICE case SO_SPLICE: if (m == NULL) { error = sosplice(so, -1, 0, NULL); } else if (m->m_len < sizeof(int)) { return (EINVAL); } else if (m->m_len < sizeof(struct splice)) { error = sosplice(so, *mtod(m, int *), 0, NULL); } else { error = sosplice(so, mtod(m, struct splice *)->sp_fd, mtod(m, struct splice *)->sp_max, &mtod(m, struct splice *)->sp_idle); } break; #endif /* SOCKET_SPLICE */ default: error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto->pr_ctloutput) { (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, level, optname, m); } } return (error); } int sogetopt(struct socket *so, int level, int optname, struct mbuf *m) { int error = 0; soassertlocked(so); if (level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput) { m->m_len = 0; error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, level, optname, m); if (error) return (error); return (0); } else return (ENOPROTOOPT); } else { m->m_len = sizeof (int); switch (optname) { case SO_LINGER: m->m_len = sizeof (struct linger); mtod(m, struct linger *)->l_onoff = so->so_options & SO_LINGER; mtod(m, struct linger *)->l_linger = so->so_linger; break; case SO_BINDANY: case SO_USELOOPBACK: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_ZEROIZE: *mtod(m, int *) = so->so_options & optname; break; case SO_DONTROUTE: *mtod(m, int *) = 0; break; case SO_TYPE: *mtod(m, int *) = so->so_type; break; case SO_ERROR: *mtod(m, int *) = so->so_error; so->so_error = 0; break; case SO_DOMAIN: *mtod(m, int *) = so->so_proto->pr_domain->dom_family; break; case SO_PROTOCOL: *mtod(m, int *) = so->so_proto->pr_protocol; break; case SO_SNDBUF: *mtod(m, int *) = so->so_snd.sb_hiwat; break; case SO_RCVBUF: *mtod(m, int *) = so->so_rcv.sb_hiwat; break; case SO_SNDLOWAT: *mtod(m, int *) = so->so_snd.sb_lowat; break; case SO_RCVLOWAT: *mtod(m, int *) = so->so_rcv.sb_lowat; break; case SO_SNDTIMEO: case SO_RCVTIMEO: { struct timeval tv; uint64_t nsecs = (optname == SO_SNDTIMEO ? so->so_snd.sb_timeo_nsecs : so->so_rcv.sb_timeo_nsecs); m->m_len = sizeof(struct timeval); memset(&tv, 0, sizeof(tv)); if (nsecs != INFSLP) NSEC_TO_TIMEVAL(nsecs, &tv); memcpy(mtod(m, struct timeval *), &tv, sizeof tv); break; } case SO_RTABLE: if (so->so_proto->pr_domain && so->so_proto->pr_domain->dom_protosw && so->so_proto->pr_ctloutput) { struct domain *dom = so->so_proto->pr_domain; level = dom->dom_protosw->pr_protocol; error = (*so->so_proto->pr_ctloutput) (PRCO_GETOPT, so, level, optname, m); if (error) return (error); break; } return (ENOPROTOOPT); #ifdef SOCKET_SPLICE case SO_SPLICE: { off_t len; m->m_len = sizeof(off_t); len = so->so_sp ? so->so_sp->ssp_len : 0; memcpy(mtod(m, off_t *), &len, sizeof(off_t)); break; } #endif /* SOCKET_SPLICE */ case SO_PEERCRED: if (so->so_proto->pr_protocol == AF_UNIX) { struct unpcb *unp = sotounpcb(so); if (unp->unp_flags & UNP_FEIDS) { m->m_len = sizeof(unp->unp_connid); memcpy(mtod(m, caddr_t), &(unp->unp_connid), m->m_len); break; } return (ENOTCONN); } return (EOPNOTSUPP); default: return (ENOPROTOOPT); } return (0); } } void sohasoutofband(struct socket *so) { pgsigio(&so->so_sigio, SIGURG, 0); selwakeup(&so->so_rcv.sb_sel); } int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; KERNEL_ASSERT_LOCKED(); switch (kn->kn_filter) { case EVFILT_READ: if (so->so_options & SO_ACCEPTCONN) kn->kn_fop = &solisten_filtops; else kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; sb = &so->so_snd; break; case EVFILT_EXCEPT: kn->kn_fop = &soexcept_filtops; sb = &so->so_rcv; break; default: return (EINVAL); } klist_insert_locked(&sb->sb_sel.si_note, kn); return (0); } void filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; KERNEL_ASSERT_LOCKED(); klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn); } int filt_soread(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; int s, rv = 0; if ((hint & NOTE_SUBMIT) == 0) s = solock(so); kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE if (isspliced(so)) { rv = 0; } else #endif /* SOCKET_SPLICE */ if (kn->kn_sfflags & NOTE_OOB) { if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { kn->kn_fflags |= NOTE_OOB; kn->kn_data -= so->so_oobmark; rv = 1; } } else if (so->so_state & SS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { if (so->so_state & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; rv = 1; } else if (so->so_error) { /* temporary udp error */ rv = 1; } else if (kn->kn_sfflags & NOTE_LOWAT) { rv = (kn->kn_data >= kn->kn_sdata); } else { rv = (kn->kn_data >= so->so_rcv.sb_lowat); } if ((hint & NOTE_SUBMIT) == 0) sounlock(so, s); return rv; } void filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; KERNEL_ASSERT_LOCKED(); klist_remove_locked(&so->so_snd.sb_sel.si_note, kn); } int filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; int s, rv; if ((hint & NOTE_SUBMIT) == 0) s = solock(so); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) { if (so->so_state & SS_ISDISCONNECTED) kn->kn_flags |= __EV_HUP; } kn->kn_fflags = so->so_error; rv = 1; } else if (so->so_error) { /* temporary udp error */ rv = 1; } else if (((so->so_state & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { rv = 0; } else if (kn->kn_sfflags & NOTE_LOWAT) { rv = (kn->kn_data >= kn->kn_sdata); } else { rv = (kn->kn_data >= so->so_snd.sb_lowat); } if ((hint & NOTE_SUBMIT) == 0) sounlock(so, s); return (rv); } int filt_solisten(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; int s; if ((hint & NOTE_SUBMIT) == 0) s = solock(so); kn->kn_data = so->so_qlen; if ((hint & NOTE_SUBMIT) == 0) sounlock(so, s); return (kn->kn_data != 0); } #ifdef DDB void sobuf_print(struct sockbuf *, int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); void sobuf_print(struct sockbuf *sb, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) { (*pr)("\tsb_cc: %lu\n", sb->sb_cc); (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); (*pr)("\tsb_wat: %lu\n", sb->sb_wat); (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); (*pr)("\tsb_mb: %p\n", sb->sb_mb); (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); (*pr)("\tsb_sel: ...\n"); (*pr)("\tsb_flags: %i\n", sb->sb_flags); (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); } void so_print(void *v, int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) { struct socket *so = v; (*pr)("socket %p\n", so); (*pr)("so_type: %i\n", so->so_type); (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ (*pr)("so_linger: %i\n", so->so_linger); (*pr)("so_state: 0x%04x\n", so->so_state); (*pr)("so_pcb: %p\n", so->so_pcb); (*pr)("so_proto: %p\n", so->so_proto); (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio); (*pr)("so_head: %p\n", so->so_head); (*pr)("so_onq: %p\n", so->so_onq); (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); (*pr)("so_q0len: %i\n", so->so_q0len); (*pr)("so_qlen: %i\n", so->so_qlen); (*pr)("so_qlimit: %i\n", so->so_qlimit); (*pr)("so_timeo: %i\n", so->so_timeo); (*pr)("so_obmark: %lu\n", so->so_oobmark); (*pr)("so_sp: %p\n", so->so_sp); if (so->so_sp != NULL) { (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); (*pr)("\tssp_len: %lld\n", (unsigned long long)so->so_sp->ssp_len); (*pr)("\tssp_max: %lld\n", (unsigned long long)so->so_sp->ssp_max); (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, so->so_sp->ssp_idletv.tv_usec); (*pr)("\tssp_idleto: %spending (@%i)\n", timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", so->so_sp->ssp_idleto.to_time); } (*pr)("so_rcv:\n"); sobuf_print(&so->so_rcv, pr); (*pr)("so_snd:\n"); sobuf_print(&so->so_snd, pr); (*pr)("so_upcall: %p so_upcallarg: %p\n", so->so_upcall, so->so_upcallarg); (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); (*pr)("so_cpid: %d\n", so->so_cpid); } #endif
198 44 161 161 4 172 29 201 199 201 150 54 33 21 201 31 30 31 30 5 28 1 1 14 13 200 /* $OpenBSD: in6_cksum.c,v 1.18 2019/04/22 22:47:49 bluhm Exp $ */ /* $KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ #include <sys/param.h> #include <sys/mbuf.h> #include <sys/systm.h> #include <netinet/in.h> #include <netinet/ip6.h> /* * Checksum routine for Internet Protocol family headers (Portable Version). * * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} /* * m MUST contain a continuous IP6 header. * off is a offset where TCP/UDP/ICMP6 header starts. * len is a total length of a transport segment. * (e.g. TCP header + TCP payload) */ int in6_cksum(struct mbuf *m, uint8_t nxt, uint32_t off, uint32_t len) { uint16_t *w; int sum = 0; int mlen = 0; int byte_swapped = 0; struct ip6_hdr *ip6; union { uint16_t phs[4]; struct { uint32_t ph_len; uint8_t ph_zero[3]; uint8_t ph_nxt; } ph __packed; } uph; union { uint8_t c[2]; uint16_t s; } s_util; union { uint16_t s[2]; uint32_t l; } l_util; /* sanity check */ if (m->m_pkthdr.len < off + len) { panic("%s: mbuf len (%d) < off+len (%d+%d)", __func__, m->m_pkthdr.len, off, len); } /* Skip pseudo-header if nxt == 0. */ if (nxt == 0) goto skip_phdr; bzero(&uph, sizeof(uph)); /* * First create IP6 pseudo header and calculate a summary. */ ip6 = mtod(m, struct ip6_hdr *); w = (uint16_t *)&ip6->ip6_src; uph.ph.ph_len = htonl(len); uph.ph.ph_nxt = nxt; /* IPv6 source address */ sum += w[0]; if (!IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; /* IPv6 destination address */ sum += w[8]; if (!IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; /* Payload length and upper layer identifier */ sum += uph.phs[0]; sum += uph.phs[1]; sum += uph.phs[2]; sum += uph.phs[3]; skip_phdr: /* * Secondly calculate a summary of the first mbuf excluding offset. */ while (m != NULL && off > 0) { if (m->m_len <= off) off -= m->m_len; else break; m = m->m_next; } if (m == NULL) { if (off) panic("%s: out of header, off %u", __func__, off); goto end; } w = (uint16_t *)(mtod(m, uint8_t *) + off); mlen = m->m_len - off; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ if ((1 & (long) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(uint8_t *)w; w = (uint16_t *)((uint8_t *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) goto next; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(uint8_t *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(uint8_t *)w; next: m = m->m_next; /* * Lastly calculate a summary of the rest of mbufs. */ for (;m && len; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, uint16_t *); if (mlen == -1) { /* * The first byte of this mbuf is the continuation * of a word spanning between this mbuf and the * last mbuf. * * s_util.c[0] is already saved when scanning previous * mbuf. */ s_util.c[1] = *(uint8_t *)w; sum += s_util.s; w = (uint16_t *)((uint8_t *)w + 1); mlen = m->m_len - 1; len--; } else mlen = m->m_len; if (len < mlen) mlen = len; len -= mlen; /* * Force to even boundary. */ if ((1 & (long) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(uint8_t *)w; w = (uint16_t *)((uint8_t *)w + 1); mlen--; byte_swapped = 1; } /* * Unroll the loop to make overhead from * branches &c small. */ while ((mlen -= 32) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; w += 16; } mlen += 32; while ((mlen -= 8) >= 0) { sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; w += 4; } mlen += 8; if (mlen == 0 && byte_swapped == 0) continue; REDUCE; while ((mlen -= 2) >= 0) { sum += *w++; } if (byte_swapped) { REDUCE; sum <<= 8; byte_swapped = 0; if (mlen == -1) { s_util.c[1] = *(uint8_t *)w; sum += s_util.s; mlen = 0; } else mlen = -1; } else if (mlen == -1) s_util.c[0] = *(uint8_t *)w; } end: if (len) panic("%s: out of data, len %u", __func__, len); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the standard (the odd byte may be shifted left by 8 bits or not as determined by endian-ness of the machine) */ s_util.c[1] = 0; sum += s_util.s; } REDUCE; return (~sum & 0xffff); }
39 1 2 4 1 2 12 2 4 2 15 2 1 6 8 1 3 4 3 2 1 1 3 4 4 4 3 2 2 2 2 2 2 1 1 2 2 1 1 1 1 7 7 7 7 2 2 7 7 6 13 6 12 12 1 11 2 1 6 4 4 1 3 13 9 14 14 13 14 9 4 1 1 4 8 14 9 14 15 15 15 1 1 1 1 25 3 24 22 8 10 12 13 9 9 4 /* $OpenBSD: ip6_mroute.c,v 1.126 2021/03/10 10:21:49 jsg Exp $ */ /* $NetBSD: ip6_mroute.c,v 1.59 2003/12/10 09:28:38 itojun Exp $ */ /* $KAME: ip6_mroute.c,v 1.45 2001/03/25 08:38:51 itojun Exp $ */ /* * Copyright (C) 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp */ /* * Copyright (c) 1989 Stephen Deering * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 */ /* * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Mark J. Steiglitz, Stanford, May, 1991 * Modified by Van Jacobson, LBL, January 1993 * Modified by Ajit Thyagarajan, PARC, August 1993 * Modified by Bill Fenner, PARC, April 1994 * * MROUTING Revision: 3.5.1.2 */ #include <sys/param.h> #include <sys/malloc.h> #include <sys/systm.h> #include <sys/timeout.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/protosw.h> #include <sys/kernel.h> #include <sys/ioctl.h> #include <sys/syslog.h> #include <sys/sysctl.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> #include <netinet6/in6_var.h> #include <netinet/ip.h> #include <netinet/ip6.h> #include <netinet/icmp6.h> #include <netinet6/ip6_var.h> #include <netinet6/ip6_mroute.h> #include <netinet/in_pcb.h> /* #define MCAST_DEBUG */ #ifdef MCAST_DEBUG int mcast6_debug = 1; #define DPRINTF(fmt, args...) \ do { \ if (mcast6_debug) \ printf("%s:%d " fmt "\n", \ __func__, __LINE__, ## args); \ } while (0) #else #define DPRINTF(fmt, args...) \ do { } while (0) #endif int ip6_mdq(struct mbuf *, struct ifnet *, struct rtentry *); void phyint_send6(struct ifnet *, struct ip6_hdr *, struct mbuf *); /* * Globals. All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static, * except for netstat or debugging purposes. */ struct socket *ip6_mrouter[RT_TABLEID_MAX + 1]; struct rttimer_queue *mrouter6q[RT_TABLEID_MAX + 1]; int ip6_mrouter_ver = 0; int ip6_mrtproto; /* for netstat only */ struct mrt6stat mrt6stat; #define NO_RTE_FOUND 0x1 #define RTE_FOUND 0x2 #define MCAST_EXPIRE_TIMEOUT 30 /* seconds */ /* * Macros to compute elapsed time efficiently * Borrowed from Van Jacobson's scheduling code */ #define TV_DELTA(a, b, delta) do { \ int xxs; \ \ delta = (a).tv_usec - (b).tv_usec; \ if ((xxs = (a).tv_sec - (b).tv_sec)) { \ switch (xxs) { \ case 2: \ delta += 1000000; \ /* FALLTHROUGH */ \ case 1: \ delta += 1000000; \ break; \ default: \ delta += (1000000 * xxs); \ } \ } \ } while (0) #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) int get_sg6_cnt(struct sioc_sg_req6 *, unsigned int); int get_mif6_cnt(struct sioc_mif_req6 *, unsigned int); int ip6_mrouter_init(struct socket *, int, int); int add_m6if(struct socket *, struct mif6ctl *); int del_m6if(struct socket *, mifi_t *); int add_m6fc(struct socket *, struct mf6cctl *); int del_m6fc(struct socket *, struct mf6cctl *); struct ifnet *mrt6_iflookupbymif(mifi_t, unsigned int); struct rtentry *mf6c_find(struct ifnet *, struct in6_addr *, struct in6_addr *, unsigned int); struct rtentry *mrt6_mcast_add(struct ifnet *, struct sockaddr *, struct sockaddr *); void mrt6_mcast_del(struct rtentry *, unsigned int); void mf6c_expire_route(struct rtentry *, struct rttimer *); /* * Handle MRT setsockopt commands to modify the multicast routing tables. */ int ip6_mrouter_set(int cmd, struct socket *so, struct mbuf *m) { struct inpcb *inp = sotoinpcb(so); if (cmd != MRT6_INIT && so != ip6_mrouter[inp->inp_rtableid]) return (EPERM); switch (cmd) { case MRT6_INIT: if (m == NULL || m->m_len < sizeof(int)) return (EINVAL); return (ip6_mrouter_init(so, *mtod(m, int *), cmd)); case MRT6_DONE: return (ip6_mrouter_done(so)); case MRT6_ADD_MIF: if (m == NULL || m->m_len < sizeof(struct mif6ctl)) return (EINVAL); return (add_m6if(so, mtod(m, struct mif6ctl *))); case MRT6_DEL_MIF: if (m == NULL || m->m_len < sizeof(mifi_t)) return (EINVAL); return (del_m6if(so, mtod(m, mifi_t *))); case MRT6_ADD_MFC: if (m == NULL || m->m_len < sizeof(struct mf6cctl)) return (EINVAL); return (add_m6fc(so, mtod(m, struct mf6cctl *))); case MRT6_DEL_MFC: if (m == NULL || m->m_len < sizeof(struct mf6cctl)) return (EINVAL); return (del_m6fc(so, mtod(m, struct mf6cctl *))); default: return (EOPNOTSUPP); } } /* * Handle MRT getsockopt commands */ int ip6_mrouter_get(int cmd, struct socket *so, struct mbuf *m) { struct inpcb *inp = sotoinpcb(so); if (so != ip6_mrouter[inp->inp_rtableid]) return (EPERM); switch (cmd) { default: return EOPNOTSUPP; } } /* * Handle ioctl commands to obtain information from the cache */ int mrt6_ioctl(struct socket *so, u_long cmd, caddr_t data) { struct inpcb *inp = sotoinpcb(so); int error; if (inp == NULL) return (ENOTCONN); switch (cmd) { case SIOCGETSGCNT_IN6: NET_RLOCK_IN_IOCTL(); error = get_sg6_cnt((struct sioc_sg_req6 *)data, inp->inp_rtableid); NET_RUNLOCK_IN_IOCTL(); break; case SIOCGETMIFCNT_IN6: NET_RLOCK_IN_IOCTL(); error = get_mif6_cnt((struct sioc_mif_req6 *)data, inp->inp_rtableid); NET_RUNLOCK_IN_IOCTL(); break; default: error = ENOTTY; break; } return error; } /* * returns the packet, byte, rpf-failure count for the source group provided */ int get_sg6_cnt(struct sioc_sg_req6 *req, unsigned int rtableid) { struct rtentry *rt; struct mf6c *mf6c; rt = mf6c_find(NULL, &req->src.sin6_addr, &req->grp.sin6_addr, rtableid); if (rt == NULL) { req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return EADDRNOTAVAIL; } req->pktcnt = req->bytecnt = req->wrong_if = 0; do { mf6c = (struct mf6c *)rt->rt_llinfo; if (mf6c == NULL) continue; req->pktcnt += mf6c->mf6c_pkt_cnt; req->bytecnt += mf6c->mf6c_byte_cnt; req->wrong_if += mf6c->mf6c_wrong_if; } while ((rt = rtable_iterate(rt)) != NULL); return 0; } /* * returns the input and output packet and byte counts on the mif provided */ int get_mif6_cnt(struct sioc_mif_req6 *req, unsigned int rtableid) { struct ifnet *ifp; struct mif6 *m6; if ((ifp = mrt6_iflookupbymif(req->mifi, rtableid)) == NULL) return EINVAL; m6 = (struct mif6 *)ifp->if_mcast6; req->icount = m6->m6_pkt_in; req->ocount = m6->m6_pkt_out; req->ibytes = m6->m6_bytes_in; req->obytes = m6->m6_bytes_out; return 0; } int mrt6_sysctl_mif(void *oldp, size_t *oldlenp) { struct ifnet *ifp; caddr_t where = oldp; size_t needed, given; struct mif6 *mifp; struct mif6info minfo; given = *oldlenp; needed = 0; TAILQ_FOREACH(ifp, &ifnet, if_list) { if ((mifp = (struct mif6 *)ifp->if_mcast6) == NULL) continue; minfo.m6_mifi = mifp->m6_mifi; minfo.m6_flags = mifp->m6_flags; minfo.m6_lcl_addr = mifp->m6_lcl_addr; minfo.m6_ifindex = ifp->if_index; minfo.m6_pkt_in = mifp->m6_pkt_in; minfo.m6_pkt_out = mifp->m6_pkt_out; minfo.m6_bytes_in = mifp->m6_bytes_in; minfo.m6_bytes_out = mifp->m6_bytes_out; minfo.m6_rate_limit = mifp->m6_rate_limit; needed += sizeof(minfo); if (where && needed <= given) { int error; error = copyout(&minfo, where, sizeof(minfo)); if (error) return (error); where += sizeof(minfo); } } if (where) { *oldlenp = needed; if (given < needed) return (ENOMEM); } else *oldlenp = (11 * needed) / 10; return (0); } struct mf6csysctlarg { struct mf6cinfo *ms6a_minfos; size_t ms6a_len; size_t ms6a_needed; }; int mrt6_rtwalk_mf6csysctl(struct rtentry *rt, void *arg, unsigned int rtableid) { struct mf6c *mf6c = (struct mf6c *)rt->rt_llinfo; struct mf6csysctlarg *msa = arg; struct ifnet *ifp; struct mif6 *m6; struct mf6cinfo *minfo; int new = 0; /* Skip entries being removed. */ if (mf6c == NULL) return 0; /* Skip non-multicast routes. */ if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) != (RTF_HOST | RTF_MULTICAST)) return 0; /* User just asked for the output size. */ if (msa->ms6a_minfos == NULL) { msa->ms6a_needed += sizeof(*minfo); return 0; } /* Skip route with invalid interfaces. */ if ((ifp = if_get(rt->rt_ifidx)) == NULL) return 0; if ((m6 = (struct mif6 *)ifp->if_mcast6) == NULL) { if_put(ifp); return 0; } for (minfo = msa->ms6a_minfos; (uint8_t *)minfo < ((uint8_t *)msa->ms6a_minfos + msa->ms6a_len); minfo++) { /* Find a new entry or update old entry. */ if (!IN6_ARE_ADDR_EQUAL(&minfo->mf6c_origin.sin6_addr, &satosin6(rt->rt_gateway)->sin6_addr) || !IN6_ARE_ADDR_EQUAL(&minfo->mf6c_mcastgrp.sin6_addr, &satosin6(rt_key(rt))->sin6_addr)) { if (!IN6_IS_ADDR_UNSPECIFIED( &minfo->mf6c_origin.sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED( &minfo->mf6c_mcastgrp.sin6_addr)) continue; new = 1; } minfo->mf6c_origin = *satosin6(rt->rt_gateway); minfo->mf6c_mcastgrp = *satosin6(rt_key(rt)); minfo->mf6c_parent = mf6c->mf6c_parent; minfo->mf6c_pkt_cnt += mf6c->mf6c_pkt_cnt; minfo->mf6c_byte_cnt += mf6c->mf6c_byte_cnt; IF_SET(m6->m6_mifi, &minfo->mf6c_ifset); break; } if (new != 0) msa->ms6a_needed += sizeof(*minfo); if_put(ifp); return 0; } int mrt6_sysctl_mfc(void *oldp, size_t *oldlenp) { unsigned int rtableid; int error; struct mf6csysctlarg msa; if (oldp != NULL && *oldlenp > MAXPHYS) return EINVAL; if (oldp != NULL) msa.ms6a_minfos = malloc(*oldlenp, M_TEMP, M_WAITOK | M_ZERO); else msa.ms6a_minfos = NULL; msa.ms6a_len = *oldlenp; msa.ms6a_needed = 0; for (rtableid = 0; rtableid <= RT_TABLEID_MAX; rtableid++) { rtable_walk(rtableid, AF_INET6, NULL, mrt6_rtwalk_mf6csysctl, &msa); } if (msa.ms6a_minfos != NULL && msa.ms6a_needed > 0 && (error = copyout(msa.ms6a_minfos, oldp, msa.ms6a_needed)) != 0) { free(msa.ms6a_minfos, M_TEMP, *oldlenp); return error; } free(msa.ms6a_minfos, M_TEMP, *oldlenp); *oldlenp = msa.ms6a_needed; return 0; } /* * Enable multicast routing */ int ip6_mrouter_init(struct socket *so, int v, int cmd) { struct inpcb *inp = sotoinpcb(so); unsigned int rtableid = inp->inp_rtableid; if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_ICMPV6) return (EOPNOTSUPP); if (v != 1) return (ENOPROTOOPT); if (ip6_mrouter[rtableid] != NULL) return (EADDRINUSE); ip6_mrouter[rtableid] = so; ip6_mrouter_ver = cmd; mrouter6q[rtableid] = rt_timer_queue_create(MCAST_EXPIRE_TIMEOUT); return (0); } int mrouter6_rtwalk_delete(struct rtentry *rt, void *arg, unsigned int rtableid) { /* Skip non-multicast routes. */ if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) != (RTF_HOST | RTF_MULTICAST)) return 0; return EEXIST; } /* * Disable multicast routing */ int ip6_mrouter_done(struct socket *so) { struct inpcb *inp = sotoinpcb(so); struct ifnet *ifp; unsigned int rtableid = inp->inp_rtableid; int error; NET_ASSERT_LOCKED(); /* Delete all remaining installed multicast routes. */ do { struct rtentry *rt = NULL; error = rtable_walk(rtableid, AF_INET6, &rt, mrouter6_rtwalk_delete, NULL); if (rt != NULL && error == EEXIST) { mrt6_mcast_del(rt, rtableid); error = EAGAIN; } rtfree(rt); } while (error == EAGAIN); /* Unregister all interfaces in the domain. */ TAILQ_FOREACH(ifp, &ifnet, if_list) { if (ifp->if_rdomain != rtableid) continue; ip6_mrouter_detach(ifp); } rt_timer_queue_destroy(mrouter6q[rtableid]); ip6_mrouter[inp->inp_rtableid] = NULL; ip6_mrouter_ver = 0; mrouter6q[rtableid] = NULL; return 0; } void ip6_mrouter_detach(struct ifnet *ifp) { struct mif6 *m6 = (struct mif6 *)ifp->if_mcast6; struct in6_ifreq ifr; if (m6 == NULL) return; ifp->if_mcast6 = NULL; memset(&ifr, 0, sizeof(ifr)); ifr.ifr_addr.sin6_family = AF_INET6; ifr.ifr_addr.sin6_addr = in6addr_any; KERNEL_LOCK(); (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); KERNEL_UNLOCK(); free(m6, M_MRTABLE, sizeof(*m6)); } /* * Add a mif to the mif table */ int add_m6if(struct socket *so, struct mif6ctl *mifcp) { struct inpcb *inp = sotoinpcb(so); struct mif6 *mifp; struct ifnet *ifp; struct in6_ifreq ifr; int error; unsigned int rtableid = inp->inp_rtableid; NET_ASSERT_LOCKED(); if (mifcp->mif6c_mifi >= MAXMIFS) return EINVAL; if (mrt6_iflookupbymif(mifcp->mif6c_mifi, rtableid) != NULL) return EADDRINUSE; /* XXX: is it appropriate? */ { ifp = if_get(mifcp->mif6c_pifi); if (ifp == NULL) return ENXIO; /* Make sure the interface supports multicast */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { if_put(ifp); return EOPNOTSUPP; } /* * Enable promiscuous reception of all IPv6 multicasts * from the interface. */ memset(&ifr, 0, sizeof(ifr)); ifr.ifr_addr.sin6_family = AF_INET6; ifr.ifr_addr.sin6_addr = in6addr_any; error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); if (error) { if_put(ifp); return error; } } mifp = malloc(sizeof(*mifp), M_MRTABLE, M_WAITOK | M_ZERO); ifp->if_mcast6 = (caddr_t)mifp; mifp->m6_mifi = mifcp->mif6c_mifi; mifp->m6_flags = mifcp->mif6c_flags; #ifdef notyet /* scaling up here allows division by 1024 in critical code */ mifp->m6_rate_limit = mifcp->mif6c_rate_limit * 1024 / 1000; #endif if_put(ifp); return 0; } /* * Delete a mif from the mif table */ int del_m6if(struct socket *so, mifi_t *mifip) { struct inpcb *inp = sotoinpcb(so); struct ifnet *ifp; NET_ASSERT_LOCKED(); if (*mifip >= MAXMIFS) return EINVAL; if ((ifp = mrt6_iflookupbymif(*mifip, inp->inp_rtableid)) == NULL) return EINVAL; ip6_mrouter_detach(ifp); return 0; } int mf6c_add_route(struct ifnet *ifp, struct sockaddr *origin, struct sockaddr *group, struct mf6cctl *mf6cc, int wait) { struct rtentry *rt; struct mf6c *mf6c; unsigned int rtableid = ifp->if_rdomain; #ifdef MCAST_DEBUG char bsrc[INET6_ADDRSTRLEN], bdst[INET6_ADDRSTRLEN]; #endif /* MCAST_DEBUG */ rt = mrt6_mcast_add(ifp, origin, group); if (rt == NULL) return ENOENT; mf6c = malloc(sizeof(*mf6c), M_MRTABLE, wait | M_ZERO); if (mf6c == NULL) { DPRINTF("origin %s group %s parent %d (%s) malloc failed", inet_ntop(AF_INET6, origin, bsrc, sizeof(bsrc)), inet_ntop(AF_INET6, group, bdst, sizeof(bdst)), mf6cc->mf6cc_parent, ifp->if_xname); mrt6_mcast_del(rt, rtableid); rtfree(rt); return ENOMEM; } rt->rt_llinfo = (caddr_t)mf6c; rt_timer_add(rt, mf6c_expire_route, mrouter6q[rtableid], rtableid); mf6c->mf6c_parent = mf6cc->mf6cc_parent; rtfree(rt); return 0; } void mf6c_update(struct mf6cctl *mf6cc, int wait, unsigned int rtableid) { struct rtentry *rt; struct mf6c *mf6c; struct ifnet *ifp; struct sockaddr_in6 osin6, gsin6; mifi_t mifi; #ifdef MCAST_DEBUG char bdst[INET6_ADDRSTRLEN]; #endif /* MCAST_DEBUG */ memset(&osin6, 0, sizeof(osin6)); osin6.sin6_family = AF_INET6; osin6.sin6_len = sizeof(osin6); osin6.sin6_addr = mf6cc->mf6cc_origin.sin6_addr; memset(&gsin6, 0, sizeof(gsin6)); gsin6.sin6_family = AF_INET6; gsin6.sin6_len = sizeof(gsin6); gsin6.sin6_addr = mf6cc->mf6cc_mcastgrp.sin6_addr; for (mifi = 0; mifi < MAXMIFS; mifi++) { if (mifi == mf6cc->mf6cc_parent) continue; /* Test for mif existence and then update the entry. */ if ((ifp = mrt6_iflookupbymif(mifi, rtableid)) == NULL) continue; rt = mf6c_find(ifp, &mf6cc->mf6cc_origin.sin6_addr, &mf6cc->mf6cc_mcastgrp.sin6_addr, rtableid); /* mif not configured or removed. */ if (!IF_ISSET(mifi, &mf6cc->mf6cc_ifset)) { /* Route doesn't exist, nothing to do. */ if (rt == NULL) continue; DPRINTF("del route (group %s) for mif %d (%s)", inet_ntop(AF_INET6, &mf6cc->mf6cc_mcastgrp.sin6_addr, bdst, sizeof(bdst)), mifi, ifp->if_xname); mrt6_mcast_del(rt, rtableid); rtfree(rt); continue; } /* Route exists, look for changes. */ if (rt != NULL) { mf6c = (struct mf6c *)rt->rt_llinfo; /* Skip route being deleted. */ if (mf6c == NULL) { rtfree(rt); continue; } /* No new changes to apply. */ if (mf6cc->mf6cc_parent == mf6c->mf6c_parent) { rtfree(rt); continue; } DPRINTF("update route (group %s) for mif %d (%s)", inet_ntop(AF_INET6, &mf6cc->mf6cc_mcastgrp.sin6_addr, bdst, sizeof(bdst)), mifi, ifp->if_xname); mf6c->mf6c_parent = mf6cc->mf6cc_parent; rtfree(rt); continue; } DPRINTF("add route (group %s) for mif %d (%s)", inet_ntop(AF_INET6, &mf6cc->mf6cc_mcastgrp.sin6_addr, bdst, sizeof(bdst)), mifi, ifp->if_xname); mf6c_add_route(ifp, sin6tosa(&osin6), sin6tosa(&gsin6), mf6cc, wait); } /* Create route for the parent interface. */ if ((ifp = mrt6_iflookupbymif(mf6cc->mf6cc_parent, rtableid)) == NULL) { DPRINTF("failed to find upstream interface %d", mf6cc->mf6cc_parent); return; } /* We already have a route, nothing to do here. */ if ((rt = mf6c_find(ifp, &mf6cc->mf6cc_origin.sin6_addr, &mf6cc->mf6cc_mcastgrp.sin6_addr, rtableid)) != NULL) { rtfree(rt); return; } DPRINTF("add upstream route (group %s) for if %s", inet_ntop(AF_INET6, &mf6cc->mf6cc_mcastgrp.sin6_addr, bdst, sizeof(bdst)), ifp->if_xname); mf6c_add_route(ifp, sin6tosa(&osin6), sin6tosa(&gsin6), mf6cc, wait); } int mf6c_add(struct mf6cctl *mfccp, struct in6_addr *origin, struct in6_addr *group, int vidx, unsigned int rtableid, int wait) { struct ifnet *ifp; struct mif6 *m6; struct mf6cctl mf6cc; ifp = mrt6_iflookupbymif(vidx, rtableid); if (ifp == NULL || (m6 = (struct mif6 *)ifp->if_mcast6) == NULL) return ENOENT; memset(&mf6cc, 0, sizeof(mf6cc)); if (mfccp == NULL) { mf6cc.mf6cc_origin.sin6_family = AF_INET6; mf6cc.mf6cc_origin.sin6_len = sizeof(mf6cc.mf6cc_origin); mf6cc.mf6cc_origin.sin6_addr = *origin; mf6cc.mf6cc_mcastgrp.sin6_family = AF_INET6; mf6cc.mf6cc_mcastgrp.sin6_len = sizeof(mf6cc.mf6cc_mcastgrp); mf6cc.mf6cc_mcastgrp.sin6_addr = *group; mf6cc.mf6cc_parent = vidx; } else memcpy(&mf6cc, mfccp, sizeof(mf6cc)); mf6c_update(&mf6cc, wait, rtableid); return 0; } int add_m6fc(struct socket *so, struct mf6cctl *mfccp) { struct inpcb *inp = sotoinpcb(so); unsigned int rtableid = inp->inp_rtableid; NET_ASSERT_LOCKED(); return mf6c_add(mfccp, &mfccp->mf6cc_origin.sin6_addr, &mfccp->mf6cc_mcastgrp.sin6_addr, mfccp->mf6cc_parent, rtableid, M_WAITOK); } int del_m6fc(struct socket *so, struct mf6cctl *mfccp) { struct inpcb *inp = sotoinpcb(so); struct rtentry *rt; unsigned int rtableid = inp->inp_rtableid; NET_ASSERT_LOCKED(); while ((rt = mf6c_find(NULL, &mfccp->mf6cc_origin.sin6_addr, &mfccp->mf6cc_mcastgrp.sin6_addr, rtableid)) != NULL) { mrt6_mcast_del(rt, rtableid); rtfree(rt); } return 0; } int socket6_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src) { if (s) { if (sbappendaddr(s, &s->so_rcv, sin6tosa(src), mm, NULL) != 0) { sorwakeup(s); return 0; } } m_freem(mm); return -1; } /* * IPv6 multicast forwarding function. This function assumes that the packet * pointed to by "ip6" has arrived on (or is about to be sent to) the interface * pointed to by "ifp", and the packet is to be relayed to other networks * that have members of the packet's destination IPv6 multicast group. * * The packet is returned unscathed to the caller, unless it is * erroneous, in which case a non-zero return value tells the caller to * discard it. */ int ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) { struct rtentry *rt; struct mif6 *mifp; struct mbuf *mm; struct sockaddr_in6 sin6; unsigned int rtableid = ifp->if_rdomain; NET_ASSERT_LOCKED(); /* * Don't forward a packet with Hop limit of zero or one, * or a packet destined to a local-only group. */ if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) || IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) return 0; ip6->ip6_hlim--; /* * Source address check: do not forward packets with unspecified * source. It was discussed in July 2000, on ipngwg mailing list. * This is rather more serious than unicast cases, because some * MLD packets can be sent with the unspecified source address * (although such packets must normally set 1 to the hop limit field). */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat_inc(ip6s_cantforward); if (ip6_log_time + ip6_log_interval < getuptime()) { char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; ip6_log_time = getuptime(); inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src)); inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)); log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on interface %u\n", src, dst, ip6->ip6_nxt, m->m_pkthdr.ph_ifidx); } return 0; } /* * Determine forwarding mifs from the forwarding cache table */ rt = mf6c_find(NULL, &ip6->ip6_src, &ip6->ip6_dst, rtableid); /* Entry exists, so forward if necessary */ if (rt) { return (ip6_mdq(m, ifp, rt)); } else { /* * If we don't have a route for packet's origin, * Make a copy of the packet & * send message to routing daemon */ mrt6stat.mrt6s_no_route++; { struct mrt6msg *im; if ((mifp = (struct mif6 *)ifp->if_mcast6) == NULL) return EHOSTUNREACH; /* * Make a copy of the header to send to the user * level process */ mm = m_copym(m, 0, sizeof(struct ip6_hdr), M_NOWAIT); if (mm == NULL) return ENOBUFS; /* * Send message to routing daemon */ (void)memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ip6->ip6_src; im = NULL; switch (ip6_mrouter_ver) { case MRT6_INIT: im = mtod(mm, struct mrt6msg *); im->im6_msgtype = MRT6MSG_NOCACHE; im->im6_mbz = 0; im->im6_mif = mifp->m6_mifi; break; default: m_freem(mm); return EINVAL; } if (socket6_send(ip6_mrouter[rtableid], mm, &sin6) < 0) { log(LOG_WARNING, "ip6_mforward: ip6_mrouter " "socket queue full\n"); mrt6stat.mrt6s_upq_sockfull++; return ENOBUFS; } mrt6stat.mrt6s_upcalls++; mf6c_add(NULL, &ip6->ip6_src, &ip6->ip6_dst, mifp->m6_mifi, rtableid, M_NOWAIT); } return 0; } } void mf6c_expire_route(struct rtentry *rt, struct rttimer *rtt) { struct mf6c *mf6c = (struct mf6c *)rt->rt_llinfo; unsigned int rtableid = rtt->rtt_tableid; #ifdef MCAST_DEBUG char bsrc[INET6_ADDRSTRLEN], bdst[INET6_ADDRSTRLEN]; #endif /* MCAST_DEBUG */ /* Skip entry being deleted. */ if (mf6c == NULL) return; DPRINTF("origin %s group %s interface %d expire %s", inet_ntop(AF_INET6, &satosin6(rt->rt_gateway)->sin6_addr, bsrc, sizeof(bsrc)), inet_ntop(AF_INET6, &satosin6(rt_key(rt))->sin6_addr, bdst, sizeof(bdst)), rt->rt_ifidx, mf6c->mf6c_expire ? "yes" : "no"); if (mf6c->mf6c_expire == 0) { mf6c->mf6c_expire = 1; rt_timer_add(rt, mf6c_expire_route, mrouter6q[rtableid], rtableid); return; } mrt6_mcast_del(rt, rtableid); } /* * Packet forwarding routine once entry in the cache is made */ int ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct mif6 *m6, *mifp = (struct mif6 *)ifp->if_mcast6; struct mf6c *mf6c = (struct mf6c *)rt->rt_llinfo; struct ifnet *ifn; int plen = m->m_pkthdr.len; if (mifp == NULL || mf6c == NULL) { rtfree(rt); return EHOSTUNREACH; } /* * Don't forward if it didn't arrive from the parent mif * for its origin. */ if (mifp->m6_mifi != mf6c->mf6c_parent) { /* came in the wrong interface */ mrt6stat.mrt6s_wrong_if++; mf6c->mf6c_wrong_if++; rtfree(rt); return 0; } /* if wrong iif */ /* If I sourced this packet, it counts as output, else it was input. */ if (m->m_pkthdr.ph_ifidx == 0) { /* XXX: is ph_ifidx really 0 when output?? */ mifp->m6_pkt_out++; mifp->m6_bytes_out += plen; } else { mifp->m6_pkt_in++; mifp->m6_bytes_in += plen; } /* * For each mif, forward a copy of the packet if there are group * members downstream on the interface. */ do { /* Don't consider non multicast routes. */ if (ISSET(rt->rt_flags, RTF_HOST | RTF_MULTICAST) != (RTF_HOST | RTF_MULTICAST)) continue; mf6c = (struct mf6c *)rt->rt_llinfo; if (mf6c == NULL) continue; mf6c->mf6c_pkt_cnt++; mf6c->mf6c_byte_cnt += m->m_pkthdr.len; /* Don't let this route expire. */ mf6c->mf6c_expire = 0; if ((ifn = if_get(rt->rt_ifidx)) == NULL) continue; /* Sanity check: did we configure this? */ if ((m6 = (struct mif6 *)ifn->if_mcast6) == NULL) { if_put(ifn); continue; } /* Don't send in the upstream interface. */ if (mf6c->mf6c_parent == m6->m6_mifi) { if_put(ifn); continue; } /* * check if the outgoing packet is going to break * a scope boundary. */ if ((mifp->m6_flags & MIFF_REGISTER) == 0 && (m6->m6_flags & MIFF_REGISTER) == 0 && (in6_addr2scopeid(ifp->if_index, &ip6->ip6_dst) != in6_addr2scopeid(ifn->if_index, &ip6->ip6_dst) || in6_addr2scopeid(ifp->if_index, &ip6->ip6_src) != in6_addr2scopeid(ifn->if_index, &ip6->ip6_src))) { if_put(ifn); ip6stat_inc(ip6s_badscope); continue; } m6->m6_pkt_out++; m6->m6_bytes_out += plen; phyint_send6(ifn, ip6, m); if_put(ifn); } while ((rt = rtable_iterate(rt)) != NULL); return 0; } void phyint_send6(struct ifnet *ifp, struct ip6_hdr *ip6, struct mbuf *m) { struct mbuf *mb_copy; struct sockaddr_in6 *dst6, sin6; int error = 0; NET_ASSERT_LOCKED(); /* * Make a new reference to the packet; make sure that * the IPv6 header is actually copied, not just referenced, * so that ip6_output() only scribbles on the copy. */ mb_copy = m_dup_pkt(m, max_linkhdr, M_NOWAIT); if (mb_copy == NULL) return; /* set MCAST flag to the outgoing packet */ mb_copy->m_flags |= M_MCAST; /* * If we sourced the packet, call ip6_output since we may divide * the packet into fragments when the packet is too big for the * outgoing interface. * Otherwise, we can simply send the packet to the interface * sending queue. */ if (m->m_pkthdr.ph_ifidx == 0) { struct ip6_moptions im6o; im6o.im6o_ifidx = ifp->if_index; /* XXX: ip6_output will override ip6->ip6_hlim */ im6o.im6o_hlim = ip6->ip6_hlim; im6o.im6o_loop = 1; error = ip6_output(mb_copy, NULL, NULL, IPV6_FORWARDING, &im6o, NULL); return; } /* * If we belong to the destination multicast group * on the outgoing interface, loop back a copy. */ dst6 = &sin6; memset(&sin6, 0, sizeof(sin6)); if (in6_hasmulti(&ip6->ip6_dst, ifp)) { dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; ip6_mloopback(ifp, m, dst6); } /* * Put the packet into the sending queue of the outgoing interface * if it would fit in the MTU of the interface. */ if (mb_copy->m_pkthdr.len <= ifp->if_m