/*
       *  linux/fs/proc/inode.c
       *
       *  Copyright (C) 1991, 1992  Linus Torvalds
       */
      
      #include <linux/time.h>
      #include <linux/proc_fs.h>
      #include <linux/kernel.h>
      #include <linux/pid_namespace.h>
      #include <linux/mm.h>
      #include <linux/string.h>
      #include <linux/stat.h>
      #include <linux/completion.h>
      #include <linux/poll.h>
      #include <linux/printk.h>
      #include <linux/file.h>
      #include <linux/limits.h>
      #include <linux/init.h>
      #include <linux/module.h>
      #include <linux/sysctl.h>
      #include <linux/seq_file.h>
      #include <linux/slab.h>
      #include <linux/mount.h>
      #include <linux/magic.h>
      
      #include <asm/uaccess.h>
      
      #include "internal.h"
      
      static void proc_evict_inode(struct inode *inode)
      {
              struct proc_dir_entry *de;
              struct ctl_table_header *head;
      
   33         truncate_inode_pages_final(&inode->i_data);
              clear_inode(inode);
      
              /* Stop tracking associated processes */
              put_pid(PROC_I(inode)->pid);
      
              /* Let go of any associated proc directory entry */
              de = PDE(inode);
              if (de)
   20                 pde_put(de);
   33         head = PROC_I(inode)->sysctl;
              if (head) {
                      RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
                      sysctl_head_put(head);
              }
   33 }
      
      static struct kmem_cache * proc_inode_cachep;
      
      static struct inode *proc_alloc_inode(struct super_block *sb)
      {
              struct proc_inode *ei;
              struct inode *inode;
      
  670         ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
              if (!ei)
                      return NULL;
  670         ei->pid = NULL;
              ei->fd = 0;
              ei->op.proc_get_link = NULL;
              ei->pde = NULL;
              ei->sysctl = NULL;
              ei->sysctl_entry = NULL;
              ei->ns_ops = NULL;
              inode = &ei->vfs_inode;
              inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  670         return inode;
      }
      
      static void proc_i_callback(struct rcu_head *head)
      {
              struct inode *inode = container_of(head, struct inode, i_rcu);
              kmem_cache_free(proc_inode_cachep, PROC_I(inode));
      }
      
      static void proc_destroy_inode(struct inode *inode)
      {
   33         call_rcu(&inode->i_rcu, proc_i_callback);
      }
      
      static void init_once(void *foo)
      {
              struct proc_inode *ei = (struct proc_inode *) foo;
      
   51         inode_init_once(&ei->vfs_inode);
      }
      
      void __init proc_init_inodecache(void)
      {
              proc_inode_cachep = kmem_cache_create("proc_inode_cache",
                                                   sizeof(struct proc_inode),
                                                   0, (SLAB_RECLAIM_ACCOUNT|
                                                      SLAB_MEM_SPREAD|SLAB_PANIC),
                                                   init_once);
      }
      
      static int proc_show_options(struct seq_file *seq, struct dentry *root)
      {
   38         struct super_block *sb = root->d_sb;
              struct pid_namespace *pid = sb->s_fs_info;
      
              if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
                      seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
   38         if (pid->hide_pid != 0)
                      seq_printf(seq, ",hidepid=%u", pid->hide_pid);
      
   38         return 0;
      }
      
      static const struct super_operations proc_sops = {
              .alloc_inode        = proc_alloc_inode,
              .destroy_inode        = proc_destroy_inode,
              .drop_inode        = generic_delete_inode,
              .evict_inode        = proc_evict_inode,
              .statfs                = simple_statfs,
              .remount_fs        = proc_remount,
              .show_options        = proc_show_options,
      };
      
      enum {BIAS = -1U<<31};
      
      static inline int use_pde(struct proc_dir_entry *pde)
      {
  511         return atomic_inc_unless_negative(&pde->in_use);
      }
      
      static void unuse_pde(struct proc_dir_entry *pde)
      {
  485         if (atomic_dec_return(&pde->in_use) == BIAS)
                      complete(pde->pde_unload_completion);
  485 }
      
      /* pde is locked */
      static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
      {
   11         if (pdeo->closing) {
                      /* somebody else is doing that, just wait */
                      DECLARE_COMPLETION_ONSTACK(c);
                      pdeo->c = &c;
                      spin_unlock(&pde->pde_unload_lock);
                      wait_for_completion(&c);
                      spin_lock(&pde->pde_unload_lock);
              } else {
                      struct file *file;
   11                 pdeo->closing = 1;
                      spin_unlock(&pde->pde_unload_lock);
                      file = pdeo->file;
                      pde->proc_fops->release(file_inode(file), file);
                      spin_lock(&pde->pde_unload_lock);
   11                 list_del_init(&pdeo->lh);
                      if (pdeo->c)
                              complete(pdeo->c);
   11                 kfree(pdeo);
              }
   11 }
      
      void proc_entry_rundown(struct proc_dir_entry *de)
      {
  136         DECLARE_COMPLETION_ONSTACK(c);
              /* Wait until all existing callers into module are done. */
              de->pde_unload_completion = &c;
              if (atomic_add_return(BIAS, &de->in_use) != BIAS)
                      wait_for_completion(&c);
      
  136         spin_lock(&de->pde_unload_lock);
              while (!list_empty(&de->pde_openers)) {
                      struct pde_opener *pdeo;
                      pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
                      close_pdeo(de, pdeo);
              }
  136         spin_unlock(&de->pde_unload_lock);
      }
      
      static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
      {
   11         struct proc_dir_entry *pde = PDE(file_inode(file));
              loff_t rv = -EINVAL;
   11         if (use_pde(pde)) {
                      loff_t (*llseek)(struct file *, loff_t, int);
   11                 llseek = pde->proc_fops->llseek;
                      if (!llseek)
                              llseek = default_llseek;
   11                 rv = llseek(file, offset, whence);
                      unuse_pde(pde);
              }
   10         return rv;
      }
      
      static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
      {
              ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
  334         struct proc_dir_entry *pde = PDE(file_inode(file));
              ssize_t rv = -EIO;
  334         if (use_pde(pde)) {
  334                 read = pde->proc_fops->read;
                      if (read)
  334                         rv = read(file, buf, count, ppos);
  301                 unuse_pde(pde);
              }
  301         return rv;
      }
      
      static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
      {
              ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
    7         struct proc_dir_entry *pde = PDE(file_inode(file));
              ssize_t rv = -EIO;
    7         if (use_pde(pde)) {
    7                 write = pde->proc_fops->write;
                      if (write)
                              rv = write(file, buf, count, ppos);
    7                 unuse_pde(pde);
              }
    7         return rv;
      }
      
      static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
      {
   12         struct proc_dir_entry *pde = PDE(file_inode(file));
              unsigned int rv = DEFAULT_POLLMASK;
              unsigned int (*poll)(struct file *, struct poll_table_struct *);
   12         if (use_pde(pde)) {
   11                 poll = pde->proc_fops->poll;
                      if (poll)
                              rv = poll(file, pts);
   11                 unuse_pde(pde);
              }
   11         return rv;
      }
      
      static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
      {
    4         struct proc_dir_entry *pde = PDE(file_inode(file));
              long rv = -ENOTTY;
              long (*ioctl)(struct file *, unsigned int, unsigned long);
    4         if (use_pde(pde)) {
    4                 ioctl = pde->proc_fops->unlocked_ioctl;
                      if (ioctl)
                              rv = ioctl(file, cmd, arg);
    4                 unuse_pde(pde);
              }
    4         return rv;
      }
      
      #ifdef CONFIG_COMPAT
      static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
      {
              struct proc_dir_entry *pde = PDE(file_inode(file));
              long rv = -ENOTTY;
              long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
              if (use_pde(pde)) {
                      compat_ioctl = pde->proc_fops->compat_ioctl;
                      if (compat_ioctl)
                              rv = compat_ioctl(file, cmd, arg);
                      unuse_pde(pde);
              }
              return rv;
      }
      #endif
      
      static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
      {
    3         struct proc_dir_entry *pde = PDE(file_inode(file));
              int rv = -EIO;
              int (*mmap)(struct file *, struct vm_area_struct *);
    3         if (use_pde(pde)) {
    3                 mmap = pde->proc_fops->mmap;
                      if (mmap)
                              rv = mmap(file, vma);
    3                 unuse_pde(pde);
              }
    3         return rv;
      }
      
      static unsigned long
      proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
                                 unsigned long len, unsigned long pgoff,
                                 unsigned long flags)
      {
    7         struct proc_dir_entry *pde = PDE(file_inode(file));
              unsigned long rv = -EIO;
      
    7         if (use_pde(pde)) {
                      typeof(proc_reg_get_unmapped_area) *get_area;
      
    7                 get_area = pde->proc_fops->get_unmapped_area;
      #ifdef CONFIG_MMU
                      if (!get_area)
    7                         get_area = current->mm->get_unmapped_area;
      #endif
      
                      if (get_area)
    7                         rv = get_area(file, orig_addr, len, pgoff, flags);
                      else
                              rv = orig_addr;
    7                 unuse_pde(pde);
              }
    7         return rv;
      }
      
      static int proc_reg_open(struct inode *inode, struct file *file)
      {
  224         struct proc_dir_entry *pde = PDE(inode);
              int rv = 0;
              int (*open)(struct inode *, struct file *);
              int (*release)(struct inode *, struct file *);
              struct pde_opener *pdeo;
      
              /*
               * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
               * sequence. ->release won't be called because ->proc_fops will be
               * cleared. Depending on complexity of ->release, consequences vary.
               *
               * We can't wait for mercy when close will be done for real, it's
               * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
               * by hand in remove_proc_entry(). For this, save opener's credentials
               * for later.
               */
              pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
  224         if (!pdeo)
                      return -ENOMEM;
      
  224         if (!use_pde(pde)) {
                      kfree(pdeo);
                      return -ENOENT;
              }
  224         open = pde->proc_fops->open;
              release = pde->proc_fops->release;
      
              if (open)
  224                 rv = open(inode, file);
      
  224         if (rv == 0 && release) {
                      /* To know what to release. */
  224                 pdeo->file = file;
                      /* Strictly for "too late" ->release in proc_reg_release(). */
                      spin_lock(&pde->pde_unload_lock);
  224                 list_add(&pdeo->lh, &pde->pde_openers);
  224                 spin_unlock(&pde->pde_unload_lock);
              } else
                      kfree(pdeo);
      
  224         unuse_pde(pde);
              return rv;
  224 }
      
      static int proc_reg_release(struct inode *inode, struct file *file)
      {
   11         struct proc_dir_entry *pde = PDE(inode);
              struct pde_opener *pdeo;
              spin_lock(&pde->pde_unload_lock);
    2         list_for_each_entry(pdeo, &pde->pde_openers, lh) {
   11                 if (pdeo->file == file) {
   11                         close_pdeo(pde, pdeo);
                              break;
                      }
              }
   11         spin_unlock(&pde->pde_unload_lock);
              return 0;
      }
      
      static const struct file_operations proc_reg_file_ops = {
              .llseek                = proc_reg_llseek,
              .read                = proc_reg_read,
              .write                = proc_reg_write,
              .poll                = proc_reg_poll,
              .unlocked_ioctl        = proc_reg_unlocked_ioctl,
      #ifdef CONFIG_COMPAT
              .compat_ioctl        = proc_reg_compat_ioctl,
      #endif
              .mmap                = proc_reg_mmap,
              .get_unmapped_area = proc_reg_get_unmapped_area,
              .open                = proc_reg_open,
              .release        = proc_reg_release,
      };
      
      #ifdef CONFIG_COMPAT
      static const struct file_operations proc_reg_file_ops_no_compat = {
              .llseek                = proc_reg_llseek,
              .read                = proc_reg_read,
              .write                = proc_reg_write,
              .poll                = proc_reg_poll,
              .unlocked_ioctl        = proc_reg_unlocked_ioctl,
              .mmap                = proc_reg_mmap,
              .get_unmapped_area = proc_reg_get_unmapped_area,
              .open                = proc_reg_open,
              .release        = proc_reg_release,
      };
      #endif
      
      static const char *proc_follow_link(struct dentry *dentry, void **cookie)
      {
    6         struct proc_dir_entry *pde = PDE(d_inode(dentry));
    6         if (unlikely(!use_pde(pde)))
                      return ERR_PTR(-EINVAL);
    6         *cookie = pde;
    6         return pde->data;
      }
      
      static void proc_put_link(struct inode *unused, void *p)
      {
    6         unuse_pde(p);
      }
      
      const struct inode_operations proc_link_inode_operations = {
              .readlink        = generic_readlink,
              .follow_link        = proc_follow_link,
              .put_link        = proc_put_link,
      };
      
      struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
      {
  282         struct inode *inode = new_inode_pseudo(sb);
      
              if (inode) {
  282                 inode->i_ino = de->low_ino;
                      inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                      PROC_I(inode)->pde = de;
      
   57                 if (is_empty_pde(de)) {
                              make_empty_dir_inode(inode);
                              return inode;
                      }
  226                 if (de->mode) {
  282                         inode->i_mode = de->mode;
                              inode->i_uid = de->uid;
                              inode->i_gid = de->gid;
                      }
  282                 if (de->size)
    6                         inode->i_size = de->size;
  282                 if (de->nlink)
  282                         set_nlink(inode, de->nlink);
  282                 WARN_ON(!de->proc_iops);
  282                 inode->i_op = de->proc_iops;
                      if (de->proc_fops) {
  277                         if (S_ISREG(inode->i_mode)) {
      #ifdef CONFIG_COMPAT
  221                                 if (!de->proc_fops->compat_ioctl)
  221                                         inode->i_fop =
                                                      &proc_reg_file_ops_no_compat;
                                      else
      #endif
                                              inode->i_fop = &proc_reg_file_ops;
                              } else {
  282                                 inode->i_fop = de->proc_fops;
                              }
                      }
              } else
                     pde_put(de);
              return inode;
      }
      
      int proc_fill_super(struct super_block *s)
      {
              struct inode *root_inode;
              int ret;
      
   50         s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
              s->s_blocksize = 1024;
              s->s_blocksize_bits = 10;
              s->s_magic = PROC_SUPER_MAGIC;
              s->s_op = &proc_sops;
              s->s_time_gran = 1;
              
              pde_get(&proc_root);
              root_inode = proc_get_inode(s, &proc_root);
              if (!root_inode) {
                      pr_err("proc_fill_super: get root inode failed\n");
                      return -ENOMEM;
              }
      
   50         s->s_root = d_make_root(root_inode);
              if (!s->s_root) {
                      pr_err("proc_fill_super: allocate dentry failed\n");
                      return -ENOMEM;
              }
      
   50         ret = proc_setup_self(s);
              if (ret) {
                      return ret;
              }
   50         return proc_setup_thread_self(s);
      }
      /*
       * fs/inotify_user.c - inotify support for userspace
       *
       * Authors:
       *        John McCutchan        <ttb@tentacle.dhs.org>
       *        Robert Love        <rml@novell.com>
       *
       * Copyright (C) 2005 John McCutchan
       * Copyright 2006 Hewlett-Packard Development Company, L.P.
       *
       * Copyright (C) 2009 Eric Paris <Red Hat Inc>
       * inotify was largely rewriten to make use of the fsnotify infrastructure
       *
       * This program is free software; you can redistribute it and/or modify it
       * under the terms of the GNU General Public License as published by the
       * Free Software Foundation; either version 2, or (at your option) any
       * later version.
       *
       * This program is distributed in the hope that it will be useful, but
       * WITHOUT ANY WARRANTY; without even the implied warranty of
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       * General Public License for more details.
       */
      
      #include <linux/file.h>
      #include <linux/fs.h> /* struct inode */
      #include <linux/fsnotify_backend.h>
      #include <linux/idr.h>
      #include <linux/init.h> /* fs_initcall */
      #include <linux/inotify.h>
      #include <linux/kernel.h> /* roundup() */
      #include <linux/namei.h> /* LOOKUP_FOLLOW */
      #include <linux/sched.h> /* struct user */
      #include <linux/slab.h> /* struct kmem_cache */
      #include <linux/syscalls.h>
      #include <linux/types.h>
      #include <linux/anon_inodes.h>
      #include <linux/uaccess.h>
      #include <linux/poll.h>
      #include <linux/wait.h>
      
      #include "inotify.h"
      #include "../fdinfo.h"
      
      #include <asm/ioctls.h>
      
      /* these are configurable via /proc/sys/fs/inotify/ */
      static int inotify_max_user_instances __read_mostly;
      static int inotify_max_queued_events __read_mostly;
      static int inotify_max_user_watches __read_mostly;
      
      static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
      
      #ifdef CONFIG_SYSCTL
      
      #include <linux/sysctl.h>
      
      static int zero;
      
      struct ctl_table inotify_table[] = {
              {
                      .procname        = "max_user_instances",
                      .data                = &inotify_max_user_instances,
                      .maxlen                = sizeof(int),
                      .mode                = 0644,
                      .proc_handler        = proc_dointvec_minmax,
                      .extra1                = &zero,
              },
              {
                      .procname        = "max_user_watches",
                      .data                = &inotify_max_user_watches,
                      .maxlen                = sizeof(int),
                      .mode                = 0644,
                      .proc_handler        = proc_dointvec_minmax,
                      .extra1                = &zero,
              },
              {
                      .procname        = "max_queued_events",
                      .data                = &inotify_max_queued_events,
                      .maxlen                = sizeof(int),
                      .mode                = 0644,
                      .proc_handler        = proc_dointvec_minmax,
                      .extra1                = &zero
              },
              { }
      };
      #endif /* CONFIG_SYSCTL */
      
      static inline __u32 inotify_arg_to_mask(u32 arg)
      {
              __u32 mask;
      
              /*
               * everything should accept their own ignored, cares about children,
               * and should receive events when the inode is unmounted
               */
              mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
      
              /* mask off the flags used to open the fd */
              mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
      
              return mask;
      }
      
      static inline u32 inotify_mask_to_arg(__u32 mask)
      {
              return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
                             IN_Q_OVERFLOW);
      }
      
      /* intofiy userspace file descriptor functions */
      static unsigned int inotify_poll(struct file *file, poll_table *wait)
      {
    8         struct fsnotify_group *group = file->private_data;
              int ret = 0;
      
    8         poll_wait(file, &group->notification_waitq, wait);
    8         mutex_lock(&group->notification_mutex);
              if (!fsnotify_notify_queue_is_empty(group))
                      ret = POLLIN | POLLRDNORM;
    8         mutex_unlock(&group->notification_mutex);
      
              return ret;
      }
      
      static int round_event_name_len(struct fsnotify_event *fsn_event)
      {
              struct inotify_event_info *event;
      
              event = INOTIFY_E(fsn_event);
    3         if (!event->name_len)
                      return 0;
    7         return roundup(event->name_len + 1, sizeof(struct inotify_event));
      }
      
      /*
       * Get an inotify_kernel_event if one exists and is small
       * enough to fit in "count". Return an error pointer if
       * not large enough.
       *
       * Called with the group->notification_mutex held.
       */
      static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
                                                  size_t count)
      {
              size_t event_size = sizeof(struct inotify_event);
              struct fsnotify_event *event;
      
              if (fsnotify_notify_queue_is_empty(group))
                      return NULL;
      
    8         event = fsnotify_peek_first_event(group);
      
              pr_debug("%s: group=%p event=%p\n", __func__, group, event);
      
    5         event_size += round_event_name_len(event);
    8         if (event_size > count)
                      return ERR_PTR(-EINVAL);
      
              /* held the notification_mutex the whole time, so this is the
               * same event we peeked above */
    8         fsnotify_remove_first_event(group);
      
              return event;
      }
      
      /*
       * Copy an event to user space, returning how much we copied.
       *
       * We already checked that the event size is smaller than the
       * buffer we had in "get_one_event()" above.
       */
      static ssize_t copy_event_to_user(struct fsnotify_group *group,
                                        struct fsnotify_event *fsn_event,
                                        char __user *buf)
      {
              struct inotify_event inotify_event;
              struct inotify_event_info *event;
              size_t event_size = sizeof(struct inotify_event);
              size_t name_len;
              size_t pad_name_len;
      
              pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
      
              event = INOTIFY_E(fsn_event);
    8         name_len = event->name_len;
              /*
               * round up name length so it is a multiple of event_size
               * plus an extra byte for the terminating '\0'.
               */
    5         pad_name_len = round_event_name_len(fsn_event);
    8         inotify_event.len = pad_name_len;
              inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
              inotify_event.wd = event->wd;
              inotify_event.cookie = event->sync_cookie;
      
              /* send the main event */
              if (copy_to_user(buf, &inotify_event, event_size))
                      return -EFAULT;
      
    7         buf += event_size;
      
              /*
               * fsnotify only stores the pathname, so here we have to send the pathname
               * and then pad that pathname out to a multiple of sizeof(inotify_event)
               * with zeros.
               */
              if (pad_name_len) {
                      /* copy the path name */
                      if (copy_to_user(buf, event->name, name_len))
                              return -EFAULT;
    4                 buf += name_len;
      
                      /* fill userspace with 0's */
                      if (clear_user(buf, pad_name_len - name_len))
                              return -EFAULT;
                      event_size += pad_name_len;
              }
      
    7         return event_size;
      }
      
      static ssize_t inotify_read(struct file *file, char __user *buf,
                                  size_t count, loff_t *pos)
      {
              struct fsnotify_group *group;
              struct fsnotify_event *kevent;
              char __user *start;
              int ret;
   10         DEFINE_WAIT_FUNC(wait, woken_wake_function);
      
              start = buf;
              group = file->private_data;
      
              add_wait_queue(&group->notification_waitq, &wait);
              while (1) {
   10                 mutex_lock(&group->notification_mutex);
    8                 kevent = get_one_event(group, count);
    9                 mutex_unlock(&group->notification_mutex);
      
                      pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
      
                      if (kevent) {
                              ret = PTR_ERR(kevent);
    8                         if (IS_ERR(kevent))
                                      break;
    8                         ret = copy_event_to_user(group, kevent, buf);
                              fsnotify_destroy_event(group, kevent);
                              if (ret < 0)
                                      break;
                              buf += ret;
    7                         count -= ret;
                              continue;
                      }
      
                      ret = -EAGAIN;
    9                 if (file->f_flags & O_NONBLOCK)
                              break;
                      ret = -ERESTARTSYS;
    7                 if (signal_pending(current))
                              break;
      
    7                 if (start != buf)
                              break;
      
    7                 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
              }
    8         remove_wait_queue(&group->notification_waitq, &wait);
      
    6         if (start != buf && ret != -EFAULT)
    7                 ret = buf - start;
   10         return ret;
      }
      
      static int inotify_release(struct inode *ignored, struct file *file)
      {
   10         struct fsnotify_group *group = file->private_data;
      
              pr_debug("%s: group=%p\n", __func__, group);
      
              /* free this group, matching get was inotify_init->fsnotify_obtain_group */
              fsnotify_destroy_group(group);
      
              return 0;
      }
      
      static long inotify_ioctl(struct file *file, unsigned int cmd,
                                unsigned long arg)
      {
              struct fsnotify_group *group;
              struct fsnotify_event *fsn_event;
              void __user *p;
              int ret = -ENOTTY;
              size_t send_len = 0;
      
    4         group = file->private_data;
              p = (void __user *) arg;
      
              pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
      
    6         switch (cmd) {
              case FIONREAD:
                      mutex_lock(&group->notification_mutex);
    3                 list_for_each_entry(fsn_event, &group->notification_list,
                                          list) {
                              send_len += sizeof(struct inotify_event);
    3                         send_len += round_event_name_len(fsn_event);
                      }
    4                 mutex_unlock(&group->notification_mutex);
                      ret = put_user(send_len, (int __user *) p);
                      break;
              }
      
    5         return ret;
      }
      
      static const struct file_operations inotify_fops = {
              .show_fdinfo        = inotify_show_fdinfo,
              .poll                = inotify_poll,
              .read                = inotify_read,
              .fasync                = fsnotify_fasync,
              .release        = inotify_release,
              .unlocked_ioctl        = inotify_ioctl,
              .compat_ioctl        = inotify_ioctl,
              .llseek                = noop_llseek,
      };
      
      
      /*
       * find_inode - resolve a user-given path to a specific inode
       */
      static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
      {
              int error;
      
              error = user_path_at(AT_FDCWD, dirname, flags, path);
    2         if (error)
                      return error;
              /* you can only watch an inode if you have read permissions on it */
   26         error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
              if (error)
                      path_put(path);
              return error;
      }
      
      static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
                                    struct inotify_inode_mark *i_mark)
      {
              int ret;
      
   22         idr_preload(GFP_KERNEL);
              spin_lock(idr_lock);
      
              ret = idr_alloc_cyclic(idr, i_mark, 1, 0, GFP_NOWAIT);
              if (ret >= 0) {
                      /* we added the mark to the idr, take a reference */
   22                 i_mark->wd = ret;
                      fsnotify_get_mark(&i_mark->fsn_mark);
              }
      
   22         spin_unlock(idr_lock);
    3         idr_preload_end();
              return ret < 0 ? ret : 0;
      }
      
      static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
                                                                      int wd)
      {
   25         struct idr *idr = &group->inotify_data.idr;
              spinlock_t *idr_lock = &group->inotify_data.idr_lock;
              struct inotify_inode_mark *i_mark;
      
              assert_spin_locked(idr_lock);
      
   25         i_mark = idr_find(idr, wd);
   25         if (i_mark) {
   23                 struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
      
                      fsnotify_get_mark(fsn_mark);
                      /* One ref for being in the idr, one ref we just took */
                      BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
              }
      
   25         return i_mark;
      }
      
      static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
                                                               int wd)
      {
              struct inotify_inode_mark *i_mark;
              spinlock_t *idr_lock = &group->inotify_data.idr_lock;
      
              spin_lock(idr_lock);
              i_mark = inotify_idr_find_locked(group, wd);
              spin_unlock(idr_lock);
      
              return i_mark;
      }
      
      static void do_inotify_remove_from_idr(struct fsnotify_group *group,
                                             struct inotify_inode_mark *i_mark)
      {
   22         struct idr *idr = &group->inotify_data.idr;
              spinlock_t *idr_lock = &group->inotify_data.idr_lock;
              int wd = i_mark->wd;
      
              assert_spin_locked(idr_lock);
      
   22         idr_remove(idr, wd);
      
              /* removed from the idr, drop that ref */
              fsnotify_put_mark(&i_mark->fsn_mark);
      }
      
      /*
       * Remove the mark from the idr (if present) and drop the reference
       * on the mark because it was in the idr.
       */
      static void inotify_remove_from_idr(struct fsnotify_group *group,
                                          struct inotify_inode_mark *i_mark)
      {
              spinlock_t *idr_lock = &group->inotify_data.idr_lock;
              struct inotify_inode_mark *found_i_mark = NULL;
              int wd;
      
   22         spin_lock(idr_lock);
              wd = i_mark->wd;
      
              /*
               * does this i_mark think it is in the idr?  we shouldn't get called
               * if it wasn't....
               */
              if (wd == -1) {
                      WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
                              " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
                              i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
                      goto out;
              }
      
              /* Lets look in the idr to see if we find it */
   22         found_i_mark = inotify_idr_find_locked(group, wd);
              if (unlikely(!found_i_mark)) {
                      WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
                              " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
                              i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
                      goto out;
              }
      
              /*
               * We found an mark in the idr at the right wd, but it's
               * not the mark we were told to remove.  eparis seriously
               * fucked up somewhere.
               */
   22         if (unlikely(found_i_mark != i_mark)) {
                      WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
                              "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
                              "found_i_mark->group=%p found_i_mark->inode=%p\n",
                              __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
                              i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd,
                              found_i_mark->fsn_mark.group,
                              found_i_mark->fsn_mark.inode);
                      goto out;
              }
      
              /*
               * One ref for being in the idr
               * one ref held by the caller trying to kill us
               * one ref grabbed by inotify_idr_find
               */
   22         if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
                      printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
                              " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
                              i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
                      /* we can't really recover with bad ref cnting.. */
                      BUG();
              }
      
   22         do_inotify_remove_from_idr(group, i_mark);
      out:
              /* match the ref taken by inotify_idr_find_locked() */
              if (found_i_mark)
   21                 fsnotify_put_mark(&found_i_mark->fsn_mark);
   21         i_mark->wd = -1;
              spin_unlock(idr_lock);
      }
      
      /*
       * Send IN_IGNORED for this wd, remove this wd from the idr.
       */
      void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
                                          struct fsnotify_group *group)
      {
              struct inotify_inode_mark *i_mark;
      
              /* Queue ignore event for the watch */
   24         inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
                                   NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
      
              i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
              /* remove this mark from the idr */
              inotify_remove_from_idr(group, i_mark);
      
              atomic_dec(&group->inotify_data.user->inotify_watches);
      }
      
      /* ding dong the mark is dead */
      static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
      {
              struct inotify_inode_mark *i_mark;
      
              i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
      
   10         kmem_cache_free(inotify_inode_mark_cachep, i_mark);
      }
      
      static int inotify_update_existing_watch(struct fsnotify_group *group,
                                               struct inode *inode,
                                               u32 arg)
      {
              struct fsnotify_mark *fsn_mark;
              struct inotify_inode_mark *i_mark;
              __u32 old_mask, new_mask;
              __u32 mask;
              int add = (arg & IN_MASK_ADD);
              int ret;
      
              mask = inotify_arg_to_mask(arg);
      
              fsn_mark = fsnotify_find_inode_mark(group, inode);
              if (!fsn_mark)
                      return -ENOENT;
      
              i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
      
    3         spin_lock(&fsn_mark->lock);
      
              old_mask = fsn_mark->mask;
              if (add)
    2                 fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
              else
    1                 fsnotify_set_mark_mask_locked(fsn_mark, mask);
    3         new_mask = fsn_mark->mask;
      
              spin_unlock(&fsn_mark->lock);
      
              if (old_mask != new_mask) {
                      /* more bits in old than in new? */
    2                 int dropped = (old_mask & ~new_mask);
                      /* more bits in this fsn_mark than the inode's mask? */
                      int do_inode = (new_mask & ~inode->i_fsnotify_mask);
      
                      /* update the inode with this new fsn_mark */
                      if (dropped || do_inode)
    1                         fsnotify_recalc_inode_mask(inode);
      
              }
      
              /* return the wd */
    3         ret = i_mark->wd;
      
              /* match the get from fsnotify_find_mark() */
              fsnotify_put_mark(fsn_mark);
      
              return ret;
      }
      
      static int inotify_new_watch(struct fsnotify_group *group,
                                   struct inode *inode,
                                   u32 arg)
      {
              struct inotify_inode_mark *tmp_i_mark;
              __u32 mask;
              int ret;
              struct idr *idr = &group->inotify_data.idr;
              spinlock_t *idr_lock = &group->inotify_data.idr_lock;
      
              mask = inotify_arg_to_mask(arg);
      
   22         tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
              if (unlikely(!tmp_i_mark))
                      return -ENOMEM;
      
   22         fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
              tmp_i_mark->fsn_mark.mask = mask;
              tmp_i_mark->wd = -1;
      
              ret = -ENOSPC;
              if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
                      goto out_err;
      
   22         ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
   22         if (ret)
                      goto out_err;
      
              /* we are on the idr, now get on the inode */
   22         ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
                                             NULL, 0);
              if (ret) {
                      /* we failed to get on the inode, get off the idr */
                      inotify_remove_from_idr(group, tmp_i_mark);
                      goto out_err;
              }
      
              /* increment the number of watches the user has */
   22         atomic_inc(&group->inotify_data.user->inotify_watches);
      
              /* return the watch descriptor for this new mark */
              ret = tmp_i_mark->wd;
      
      out_err:
              /* match the ref from fsnotify_init_mark() */
   22         fsnotify_put_mark(&tmp_i_mark->fsn_mark);
      
              return ret;
      }
      
      static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
      {
              int ret = 0;
      
              mutex_lock(&group->mark_mutex);
              /* try to update and existing watch with the new arg */
    3         ret = inotify_update_existing_watch(group, inode, arg);
              /* no mark present, try to add a new one */
    3         if (ret == -ENOENT)
   22                 ret = inotify_new_watch(group, inode, arg);
   24         mutex_unlock(&group->mark_mutex);
      
              return ret;
      }
      
      static struct fsnotify_group *inotify_new_group(unsigned int max_events)
      {
              struct fsnotify_group *group;
              struct inotify_event_info *oevent;
      
              group = fsnotify_alloc_group(&inotify_fsnotify_ops);
              if (IS_ERR(group))
                      return group;
      
   24         oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL);
              if (unlikely(!oevent)) {
                      fsnotify_destroy_group(group);
                      return ERR_PTR(-ENOMEM);
              }
   24         group->overflow_event = &oevent->fse;
              fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
              oevent->wd = -1;
              oevent->sync_cookie = 0;
              oevent->name_len = 0;
      
              group->max_events = max_events;
      
              spin_lock_init(&group->inotify_data.idr_lock);
              idr_init(&group->inotify_data.idr);
              group->inotify_data.user = get_current_user();
      
              if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
                  inotify_max_user_instances) {
                      fsnotify_destroy_group(group);
                      return ERR_PTR(-EMFILE);
              }
      
              return group;
      }
      
      
      /* inotify syscalls */
   25 SYSCALL_DEFINE1(inotify_init1, int, flags)
      {
              struct fsnotify_group *group;
              int ret;
      
              /* Check the IN_* constants for consistency.  */
              BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
              BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
      
              if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
                      return -EINVAL;
      
              /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
   24         group = inotify_new_group(inotify_max_queued_events);
              if (IS_ERR(group))
                      return PTR_ERR(group);
      
   24         ret = anon_inode_getfd("inotify", &inotify_fops, group,
                                        O_RDONLY | flags);
              if (ret < 0)
    2                 fsnotify_destroy_group(group);
      
   21         return ret;
      }
      
      SYSCALL_DEFINE0(inotify_init)
      {
   15         return sys_inotify_init1(0);
      }
      
   38 SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
                      u32, mask)
      {
              struct fsnotify_group *group;
              struct inode *inode;
              struct path path;
              struct path alteredpath;
              struct path *canonical_path = &path;
              struct fd f;
              int ret;
              unsigned flags = 0;
      
              /*
               * We share a lot of code with fs/dnotify.  We also share
               * the bit layout between inotify's IN_* and the fsnotify
               * FS_*.  This check ensures that only the inotify IN_*
               * bits get passed in and set in watches/events.
               */
              if (unlikely(mask & ~ALL_INOTIFY_BITS))
                      return -EINVAL;
              /*
               * Require at least one valid bit set in the mask.
               * Without _something_ set, we would have no events to
               * watch for.
               */
   37         if (unlikely(!(mask & ALL_INOTIFY_BITS)))
                      return -EINVAL;
      
   35         f = fdget(fd);
              if (unlikely(!f.file))
                      return -EBADF;
      
              /* verify that this is indeed an inotify instance */
   33         if (unlikely(f.file->f_op != &inotify_fops)) {
                      ret = -EINVAL;
                      goto fput_and_out;
              }
      
   31         if (!(mask & IN_DONT_FOLLOW))
                      flags |= LOOKUP_FOLLOW;
              if (mask & IN_ONLYDIR)
    4                 flags |= LOOKUP_DIRECTORY;
      
   28         ret = inotify_find_inode(pathname, &path, flags);
              if (ret)
                      goto fput_and_out;
      
              /* support stacked filesystems */
   24         if(path.dentry && path.dentry->d_op) {
   23                 if (path.dentry->d_op->d_canonical_path) {
                              path.dentry->d_op->d_canonical_path(&path, &alteredpath);
                              canonical_path = &alteredpath;
                              path_put(&path);
                      }
              }
      
              /* inode held in place by reference to path; group by fget on fd */
   24         inode = canonical_path->dentry->d_inode;
              group = f.file->private_data;
      
              /* create/update an inode mark */
   24         ret = inotify_update_watch(group, inode, mask);
              path_put(canonical_path);
      fput_and_out:
   28         fdput(f);
              return ret;
      }
      
   12 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
      {
              struct fsnotify_group *group;
              struct inotify_inode_mark *i_mark;
              struct fd f;
              int ret = 0;
      
              f = fdget(fd);
              if (unlikely(!f.file))
                      return -EBADF;
      
              /* verify that this is indeed an inotify instance */
              ret = -EINVAL;
   10         if (unlikely(f.file->f_op != &inotify_fops))
                      goto out;
      
    7         group = f.file->private_data;
      
              ret = -EINVAL;
              i_mark = inotify_idr_find(group, wd);
              if (unlikely(!i_mark))
                      goto out;
      
              ret = 0;
      
    5         fsnotify_destroy_mark(&i_mark->fsn_mark, group);
      
              /* match ref taken by inotify_idr_find */
              fsnotify_put_mark(&i_mark->fsn_mark);
      
      out:
    9         fdput(f);
              return ret;
      }
      
      /*
       * inotify_user_setup - Our initialization function.  Note that we cannot return
       * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
       * must result in panic().
       */
      static int __init inotify_user_setup(void)
      {
              BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
              BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
              BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
              BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
              BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
              BUILD_BUG_ON(IN_OPEN != FS_OPEN);
              BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
              BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
              BUILD_BUG_ON(IN_CREATE != FS_CREATE);
              BUILD_BUG_ON(IN_DELETE != FS_DELETE);
              BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
              BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
              BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
              BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
              BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
              BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
              BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
              BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
      
              BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
      
              inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
      
              inotify_max_queued_events = 16384;
              inotify_max_user_instances = 128;
              inotify_max_user_watches = 8192;
      
              return 0;
      }
      fs_initcall(inotify_user_setup);
      /*
       *        linux/mm/mincore.c
       *
       * Copyright (C) 1994-2006  Linus Torvalds
       */
      
      /*
       * The mincore() system call.
       */
      #include <linux/pagemap.h>
      #include <linux/gfp.h>
      #include <linux/mm.h>
      #include <linux/mman.h>
      #include <linux/syscalls.h>
      #include <linux/swap.h>
      #include <linux/swapops.h>
      #include <linux/hugetlb.h>
      
      #include <asm/uaccess.h>
      #include <asm/pgtable.h>
      
      static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
                              unsigned long end, struct mm_walk *walk)
      {
      #ifdef CONFIG_HUGETLB_PAGE
              unsigned char present;
              unsigned char *vec = walk->private;
      
              /*
               * Hugepages under user process are always in RAM and never
               * swapped out, but theoretically it needs to be checked.
               */
              present = pte && !huge_pte_none(huge_ptep_get(pte));
              for (; addr != end; vec++, addr += PAGE_SIZE)
                      *vec = present;
              walk->private = vec;
      #else
              BUG();
      #endif
              return 0;
      }
      
      /*
       * Later we can get more picky about what "in core" means precisely.
       * For now, simply check to see if the page is in the page cache,
       * and is up to date; i.e. that no page-in operation would be required
       * at this time if an application were to map and access this page.
       */
      static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
      {
              unsigned char present = 0;
              struct page *page;
      
              /*
               * When tmpfs swaps out a page from a file, any process mapping that
               * file will not get a swp_entry_t in its pte, but rather it is like
               * any other file mapping (ie. marked !present and faulted in with
               * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
               */
      #ifdef CONFIG_SWAP
    6         if (shmem_mapping(mapping)) {
    4                 page = find_get_entry(mapping, pgoff);
                      /*
                       * shmem/tmpfs may return swap: account for swapcache
                       * page too.
                       */
                      if (radix_tree_exceptional_entry(page)) {
                              swp_entry_t swp = radix_to_swp_entry(page);
                              page = find_get_page(swap_address_space(swp), swp.val);
                      }
              } else
    2                 page = find_get_page(mapping, pgoff);
      #else
              page = find_get_page(mapping, pgoff);
      #endif
    6         if (page) {
    3                 present = PageUptodate(page);
                      page_cache_release(page);
              }
      
    6         return present;
      }
      
      static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
                                      struct vm_area_struct *vma, unsigned char *vec)
      {
    8         unsigned long nr = (end - addr) >> PAGE_SHIFT;
              int i;
      
              if (vma->vm_file) {
                      pgoff_t pgoff;
      
    6                 pgoff = linear_page_index(vma, addr);
                      for (i = 0; i < nr; i++, pgoff++)
    6                         vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
              } else {
    5                 for (i = 0; i < nr; i++)
    5                         vec[i] = 0;
              }
    7         return nr;
      }
      
      static int mincore_unmapped_range(unsigned long addr, unsigned long end,
                                         struct mm_walk *walk)
      {
              walk->private += __mincore_unmapped_range(addr, end,
    6                                                   walk->vma, walk->private);
              return 0;
      }
      
      static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                              struct mm_walk *walk)
      {
              spinlock_t *ptl;
    4         struct vm_area_struct *vma = walk->vma;
              pte_t *ptep;
              unsigned char *vec = walk->private;
              int nr = (end - addr) >> PAGE_SHIFT;
      
              if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                      memset(vec, 1, nr);
                      spin_unlock(ptl);
                      goto out;
              }
      
              if (pmd_trans_unstable(pmd)) {
                      __mincore_unmapped_range(addr, end, vma, vec);
                      goto out;
              }
      
    4         ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
              for (; addr != end; ptep++, addr += PAGE_SIZE) {
    4                 pte_t pte = *ptep;
      
                      if (pte_none(pte))
    4                         __mincore_unmapped_range(addr, addr + PAGE_SIZE,
                                                       vma, vec);
    2                 else if (pte_present(pte))
    2                         *vec = 1;
                      else { /* pte is a swap entry */
                              swp_entry_t entry = pte_to_swp_entry(pte);
      
                              if (non_swap_entry(entry)) {
                                      /*
                                       * migration or hwpoison entries are always
                                       * uptodate
                                       */
                                      *vec = 1;
                              } else {
      #ifdef CONFIG_SWAP
                                      *vec = mincore_page(swap_address_space(entry),
                                              entry.val);
      #else
                                      WARN_ON(1);
                                      *vec = 1;
      #endif
                              }
                      }
    4                 vec++;
              }
    4         pte_unmap_unlock(ptep - 1, ptl);
      out:
              walk->private += nr;
              cond_resched();
              return 0;
      }
      
      static inline bool can_do_mincore(struct vm_area_struct *vma)
      {
              if (vma_is_anonymous(vma))
                      return true;
              if (!vma->vm_file)
                      return false;
              /*
               * Reveal pagecache information only for non-anonymous mappings that
               * correspond to the files the calling process could (if tried) open
               * for writing; otherwise we'd be including shared non-exclusive
               * mappings, which opens a side channel.
               */
              return inode_owner_or_capable(file_inode(vma->vm_file)) ||
                      inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
      }
      
      /*
       * Do a chunk of "sys_mincore()". We've already checked
    9  * all the arguments, we hold the mmap semaphore: we should
       * just return the amount of info we're asked for.
    8  */
      static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
      {
              struct vm_area_struct *vma;
              unsigned long end;
    7         int err;
              struct mm_walk mincore_walk = {
                      .pmd_entry = mincore_pte_range,
                      .pte_hole = mincore_unmapped_range,
                      .hugetlb_entry = mincore_hugetlb,
                      .private = vec,
              };
      
              vma = find_vma(current->mm, addr);
              if (!vma || addr < vma->vm_start)
                      return -ENOMEM;
              end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
              if (!can_do_mincore(vma)) {
                      unsigned long pages = DIV_ROUND_UP(end - addr, PAGE_SIZE);
                      memset(vec, 1, pages);
                      return pages;
              }
              mincore_walk.mm = vma->vm_mm;
              err = walk_page_range(addr, end, &mincore_walk);
              if (err < 0)
                      return err;
              return (end - addr) >> PAGE_SHIFT;
      }
      
      /*
       * The mincore(2) system call.
       *
   16  * mincore() returns the memory residency status of the pages in the
       * current process's address space specified by [addr, addr + len).
       * The status is returned in a vector of bytes.  The least significant
       * bit of each byte is 1 if the referenced page is in memory, otherwise
       * it is zero.
       *
       * Because the status of a page can change after mincore() checks it
       * but before it returns to the application, the returned vector may
       * contain stale information.  Only locked pages are guaranteed to
       * remain in memory.
       *
       * return values:
   15  *  zero    - success
       *  -EFAULT - vec points to an illegal address
       *  -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
       *  -ENOMEM - Addresses in the range [addr, addr + len] are
   13  *                invalid for the address space of this process, or
       *                specify one or more pages which are not currently
       *                mapped
   13  *  -EAGAIN - A kernel resource was temporarily unavailable.
       */
      SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
   11                 unsigned char __user *, vec)
      {
              long retval;
              unsigned long pages;
              unsigned char *tmp;
   11 
              /* Check the start address: needs to be page-aligned.. */
               if (start & ~PAGE_CACHE_MASK)
                      return -EINVAL;
      
    9         /* ..and we need to be passed a valid user-space range */
    9         if (!access_ok(VERIFY_READ, (void __user *) start, len))
                      return -ENOMEM;
      
              /* This also avoids any overflows on PAGE_CACHE_ALIGN */
              pages = len >> PAGE_SHIFT;
    7         pages += (offset_in_page(len)) != 0;
    1 
              if (!access_ok(VERIFY_WRITE, vec, pages))
                      return -EFAULT;
    6 
              tmp = (void *) __get_free_page(GFP_USER);
              if (!tmp)
                      return -EAGAIN;
      
    8         retval = 0;
              while (pages) {
                      /*
                       * Do at most PAGE_SIZE entries per iteration, due to
                       * the temporary buffer size.
                       */
                      down_read(&current->mm->mmap_sem);
                      retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
                      up_read(&current->mm->mmap_sem);
      
                      if (retval <= 0)
                              break;
                      if (copy_to_user(vec, tmp, retval)) {
                              retval = -EFAULT;
                              break;
                      }
                      pages -= retval;
                      vec += retval;
                      start += retval << PAGE_SHIFT;
                      retval = 0;
              }
              free_page((unsigned long) tmp);
              return retval;
      }
      
      #include <linux/linkage.h>
      #include <linux/errno.h>
      
      #include <asm/unistd.h>
      
      /*  we can't #include <linux/syscalls.h> here,
          but tell gcc to not warn with -Wmissing-prototypes  */
      asmlinkage long sys_ni_syscall(void);
      
      /*
       * Non-implemented system calls get redirected here.
       */
      asmlinkage long sys_ni_syscall(void)
      {
    9         return -ENOSYS;
      }
      
      cond_syscall(sys_quotactl);
      cond_syscall(sys32_quotactl);
      cond_syscall(sys_acct);
      cond_syscall(sys_lookup_dcookie);
      cond_syscall(compat_sys_lookup_dcookie);
      cond_syscall(sys_swapon);
      cond_syscall(sys_swapoff);
      cond_syscall(sys_kexec_load);
      cond_syscall(compat_sys_kexec_load);
      cond_syscall(sys_kexec_file_load);
      cond_syscall(sys_init_module);
      cond_syscall(sys_finit_module);
      cond_syscall(sys_delete_module);
      cond_syscall(sys_socketpair);
      cond_syscall(sys_bind);
      cond_syscall(sys_listen);
      cond_syscall(sys_accept);
      cond_syscall(sys_accept4);
      cond_syscall(sys_connect);
      cond_syscall(sys_getsockname);
      cond_syscall(sys_getpeername);
      cond_syscall(sys_sendto);
      cond_syscall(sys_send);
      cond_syscall(sys_recvfrom);
      cond_syscall(sys_recv);
      cond_syscall(sys_socket);
      cond_syscall(sys_setsockopt);
      cond_syscall(compat_sys_setsockopt);
      cond_syscall(sys_getsockopt);
      cond_syscall(compat_sys_getsockopt);
      cond_syscall(sys_shutdown);
      cond_syscall(sys_sendmsg);
      cond_syscall(sys_sendmmsg);
      cond_syscall(compat_sys_sendmsg);
      cond_syscall(compat_sys_sendmmsg);
      cond_syscall(sys_recvmsg);
      cond_syscall(sys_recvmmsg);
      cond_syscall(compat_sys_recvmsg);
      cond_syscall(compat_sys_recv);
      cond_syscall(compat_sys_recvfrom);
      cond_syscall(compat_sys_recvmmsg);
      cond_syscall(sys_socketcall);
      cond_syscall(sys_futex);
      cond_syscall(compat_sys_futex);
      cond_syscall(sys_set_robust_list);
      cond_syscall(compat_sys_set_robust_list);
      cond_syscall(sys_get_robust_list);
      cond_syscall(compat_sys_get_robust_list);
      cond_syscall(sys_epoll_create);
      cond_syscall(sys_epoll_create1);
      cond_syscall(sys_epoll_ctl);
      cond_syscall(sys_epoll_wait);
      cond_syscall(sys_epoll_pwait);
      cond_syscall(compat_sys_epoll_pwait);
      cond_syscall(sys_semget);
      cond_syscall(sys_semop);
      cond_syscall(sys_semtimedop);
      cond_syscall(compat_sys_semtimedop);
      cond_syscall(sys_semctl);
      cond_syscall(compat_sys_semctl);
      cond_syscall(sys_msgget);
      cond_syscall(sys_msgsnd);
      cond_syscall(compat_sys_msgsnd);
      cond_syscall(sys_msgrcv);
      cond_syscall(compat_sys_msgrcv);
      cond_syscall(sys_msgctl);
      cond_syscall(compat_sys_msgctl);
      cond_syscall(sys_shmget);
      cond_syscall(sys_shmat);
      cond_syscall(compat_sys_shmat);
      cond_syscall(sys_shmdt);
      cond_syscall(sys_shmctl);
      cond_syscall(compat_sys_shmctl);
      cond_syscall(sys_mq_open);
      cond_syscall(sys_mq_unlink);
      cond_syscall(sys_mq_timedsend);
      cond_syscall(sys_mq_timedreceive);
      cond_syscall(sys_mq_notify);
      cond_syscall(sys_mq_getsetattr);
      cond_syscall(compat_sys_mq_open);
      cond_syscall(compat_sys_mq_timedsend);
      cond_syscall(compat_sys_mq_timedreceive);
      cond_syscall(compat_sys_mq_notify);
      cond_syscall(compat_sys_mq_getsetattr);
      cond_syscall(sys_mbind);
      cond_syscall(sys_get_mempolicy);
      cond_syscall(sys_set_mempolicy);
      cond_syscall(compat_sys_mbind);
      cond_syscall(compat_sys_get_mempolicy);
      cond_syscall(compat_sys_set_mempolicy);
      cond_syscall(sys_add_key);
      cond_syscall(sys_request_key);
      cond_syscall(sys_keyctl);
      cond_syscall(compat_sys_keyctl);
      cond_syscall(compat_sys_socketcall);
      cond_syscall(sys_inotify_init);
      cond_syscall(sys_inotify_init1);
      cond_syscall(sys_inotify_add_watch);
      cond_syscall(sys_inotify_rm_watch);
      cond_syscall(sys_migrate_pages);
      cond_syscall(sys_move_pages);
      cond_syscall(sys_chown16);
      cond_syscall(sys_fchown16);
      cond_syscall(sys_getegid16);
      cond_syscall(sys_geteuid16);
      cond_syscall(sys_getgid16);
      cond_syscall(sys_getgroups16);
      cond_syscall(sys_getresgid16);
      cond_syscall(sys_getresuid16);
      cond_syscall(sys_getuid16);
      cond_syscall(sys_lchown16);
      cond_syscall(sys_setfsgid16);
      cond_syscall(sys_setfsuid16);
      cond_syscall(sys_setgid16);
      cond_syscall(sys_setgroups16);
      cond_syscall(sys_setregid16);
      cond_syscall(sys_setresgid16);
      cond_syscall(sys_setresuid16);
      cond_syscall(sys_setreuid16);
      cond_syscall(sys_setuid16);
      cond_syscall(sys_sgetmask);
      cond_syscall(sys_ssetmask);
      cond_syscall(sys_vm86old);
      cond_syscall(sys_vm86);
      cond_syscall(sys_modify_ldt);
      cond_syscall(sys_ipc);
      cond_syscall(compat_sys_ipc);
      cond_syscall(compat_sys_sysctl);
      cond_syscall(sys_flock);
      cond_syscall(sys_io_setup);
      cond_syscall(sys_io_destroy);
      cond_syscall(sys_io_submit);
      cond_syscall(sys_io_cancel);
      cond_syscall(sys_io_getevents);
      cond_syscall(sys_sysfs);
      cond_syscall(sys_syslog);
      cond_syscall(sys_process_vm_readv);
      cond_syscall(sys_process_vm_writev);
      cond_syscall(compat_sys_process_vm_readv);
      cond_syscall(compat_sys_process_vm_writev);
      cond_syscall(sys_uselib);
      cond_syscall(sys_fadvise64);
      cond_syscall(sys_fadvise64_64);
      cond_syscall(sys_madvise);
      cond_syscall(sys_setuid);
      cond_syscall(sys_setregid);
      cond_syscall(sys_setgid);
      cond_syscall(sys_setreuid);
      cond_syscall(sys_setresuid);
      cond_syscall(sys_getresuid);
      cond_syscall(sys_setresgid);
      cond_syscall(sys_getresgid);
      cond_syscall(sys_setgroups);
      cond_syscall(sys_getgroups);
      cond_syscall(sys_setfsuid);
      cond_syscall(sys_setfsgid);
      cond_syscall(sys_capget);
      cond_syscall(sys_capset);
      
      /* arch-specific weak syscall entries */
      cond_syscall(sys_pciconfig_read);
      cond_syscall(sys_pciconfig_write);
      cond_syscall(sys_pciconfig_iobase);
      cond_syscall(compat_sys_s390_ipc);
      cond_syscall(ppc_rtas);
      cond_syscall(sys_spu_run);
      cond_syscall(sys_spu_create);
      cond_syscall(sys_subpage_prot);
      cond_syscall(sys_s390_pci_mmio_read);
      cond_syscall(sys_s390_pci_mmio_write);
      
      /* mmu depending weak syscall entries */
      cond_syscall(sys_mprotect);
      cond_syscall(sys_msync);
      cond_syscall(sys_mlock);
      cond_syscall(sys_munlock);
      cond_syscall(sys_mlockall);
      cond_syscall(sys_munlockall);
      cond_syscall(sys_mlock2);
      cond_syscall(sys_mincore);
      cond_syscall(sys_madvise);
      cond_syscall(sys_mremap);
      cond_syscall(sys_remap_file_pages);
      cond_syscall(compat_sys_move_pages);
      cond_syscall(compat_sys_migrate_pages);
      
      /* block-layer dependent */
      cond_syscall(sys_bdflush);
      cond_syscall(sys_ioprio_set);
      cond_syscall(sys_ioprio_get);
      
      /* New file descriptors */
      cond_syscall(sys_signalfd);
      cond_syscall(sys_signalfd4);
      cond_syscall(compat_sys_signalfd);
      cond_syscall(compat_sys_signalfd4);
      cond_syscall(sys_timerfd_create);
      cond_syscall(sys_timerfd_settime);
      cond_syscall(sys_timerfd_gettime);
      cond_syscall(compat_sys_timerfd_settime);
      cond_syscall(compat_sys_timerfd_gettime);
      cond_syscall(sys_eventfd);
      cond_syscall(sys_eventfd2);
      cond_syscall(sys_memfd_create);
      cond_syscall(sys_userfaultfd);
      
      /* performance counters: */
      cond_syscall(sys_perf_event_open);
      
      /* fanotify! */
      cond_syscall(sys_fanotify_init);
      cond_syscall(sys_fanotify_mark);
      cond_syscall(compat_sys_fanotify_mark);
      
      /* open by handle */
      cond_syscall(sys_name_to_handle_at);
      cond_syscall(sys_open_by_handle_at);
      cond_syscall(compat_sys_open_by_handle_at);
      
      /* compare kernel pointers */
      cond_syscall(sys_kcmp);
      
      /* operate on Secure Computing state */
      cond_syscall(sys_seccomp);
      
      /* access BPF programs and maps */
      cond_syscall(sys_bpf);
      
      /* execveat */
      cond_syscall(sys_execveat);
      
      /* membarrier */
      cond_syscall(sys_membarrier);
      /* -*- linux-c -*- --------------------------------------------------------- *
       *
       * linux/fs/devpts/inode.c
       *
       *  Copyright 1998-2004 H. Peter Anvin -- All Rights Reserved
       *
       * This file is part of the Linux kernel and is made available under
       * the terms of the GNU General Public License, version 2, or at your
       * option, any later version, incorporated herein by reference.
       *
       * ------------------------------------------------------------------------- */
      
      #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      
      #include <linux/module.h>
      #include <linux/init.h>
      #include <linux/fs.h>
      #include <linux/sched.h>
      #include <linux/namei.h>
      #include <linux/slab.h>
      #include <linux/mount.h>
      #include <linux/tty.h>
      #include <linux/mutex.h>
      #include <linux/magic.h>
      #include <linux/idr.h>
      #include <linux/devpts_fs.h>
      #include <linux/parser.h>
      #include <linux/fsnotify.h>
      #include <linux/seq_file.h>
      
      #define DEVPTS_DEFAULT_MODE 0600
      /*
       * ptmx is a new node in /dev/pts and will be unused in legacy (single-
       * instance) mode. To prevent surprises in user space, set permissions of
       * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful
       * permissions.
       */
      #define DEVPTS_DEFAULT_PTMX_MODE 0000
      #define PTMX_MINOR        2
      
      /*
       * sysctl support for setting limits on the number of Unix98 ptys allocated.
       * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
       */
      static int pty_limit = NR_UNIX98_PTY_DEFAULT;
      static int pty_reserve = NR_UNIX98_PTY_RESERVE;
      static int pty_limit_min;
      static int pty_limit_max = INT_MAX;
      static int pty_count;
      
      static struct ctl_table pty_table[] = {
              {
                      .procname        = "max",
                      .maxlen                = sizeof(int),
                      .mode                = 0644,
                      .data                = &pty_limit,
                      .proc_handler        = proc_dointvec_minmax,
                      .extra1                = &pty_limit_min,
                      .extra2                = &pty_limit_max,
              }, {
                      .procname        = "reserve",
                      .maxlen                = sizeof(int),
                      .mode                = 0644,
                      .data                = &pty_reserve,
                      .proc_handler        = proc_dointvec_minmax,
                      .extra1                = &pty_limit_min,
                      .extra2                = &pty_limit_max,
              }, {
                      .procname        = "nr",
                      .maxlen                = sizeof(int),
                      .mode                = 0444,
                      .data                = &pty_count,
                      .proc_handler        = proc_dointvec,
              },
              {}
      };
      
      static struct ctl_table pty_kern_table[] = {
              {
                      .procname        = "pty",
                      .mode                = 0555,
                      .child                = pty_table,
              },
              {}
      };
      
      static struct ctl_table pty_root_table[] = {
              {
                      .procname        = "kernel",
                      .mode                = 0555,
                      .child                = pty_kern_table,
              },
              {}
      };
      
      static DEFINE_MUTEX(allocated_ptys_lock);
      
      static struct vfsmount *devpts_mnt;
      
      struct pts_mount_opts {
              int setuid;
              int setgid;
              kuid_t   uid;
              kgid_t   gid;
              umode_t mode;
              umode_t ptmxmode;
              int newinstance;
              int max;
      };
      
      enum {
              Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,  Opt_max,
              Opt_err
      };
      
      static const match_table_t tokens = {
              {Opt_uid, "uid=%u"},
              {Opt_gid, "gid=%u"},
              {Opt_mode, "mode=%o"},
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
              {Opt_ptmxmode, "ptmxmode=%o"},
              {Opt_newinstance, "newinstance"},
              {Opt_max, "max=%d"},
      #endif
              {Opt_err, NULL}
      };
      
      struct pts_fs_info {
              struct ida allocated_ptys;
              struct pts_mount_opts mount_opts;
              struct super_block *sb;
              struct dentry *ptmx_dentry;
      };
      
      static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
      {
  155         return sb->s_fs_info;
      }
      
      static inline struct super_block *pts_sb_from_inode(struct inode *inode)
      {
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
              if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
                      return inode->i_sb;
      #endif
  198         if (!devpts_mnt)
                      return NULL;
  198         return devpts_mnt->mnt_sb;
      }
      
      #define PARSE_MOUNT        0
      #define PARSE_REMOUNT        1
      
      /*
       * parse_mount_options():
       *        Set @opts to mount options specified in @data. If an option is not
       *        specified in @data, set it to its default value. The exception is
       *        'newinstance' option which can only be set/cleared on a mount (i.e.
       *        cannot be changed during remount).
       *
       * Note: @data may be NULL (in which case all options are set to default).
       */
      static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
      {
              char *p;
              kuid_t uid;
              kgid_t gid;
      
              opts->setuid  = 0;
              opts->setgid  = 0;
              opts->uid     = GLOBAL_ROOT_UID;
              opts->gid     = GLOBAL_ROOT_GID;
              opts->mode    = DEVPTS_DEFAULT_MODE;
              opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
              opts->max     = NR_UNIX98_PTY_MAX;
      
              /* newinstance makes sense only on initial mount */
              if (op == PARSE_MOUNT)
                      opts->newinstance = 0;
      
              while ((p = strsep(&data, ",")) != NULL) {
                      substring_t args[MAX_OPT_ARGS];
                      int token;
                      int option;
      
                      if (!*p)
                              continue;
      
                      token = match_token(p, tokens, args);
                      switch (token) {
                      case Opt_uid:
                              if (match_int(&args[0], &option))
                                      return -EINVAL;
                              uid = make_kuid(current_user_ns(), option);
                              if (!uid_valid(uid))
                                      return -EINVAL;
                              opts->uid = uid;
                              opts->setuid = 1;
                              break;
                      case Opt_gid:
                              if (match_int(&args[0], &option))
                                      return -EINVAL;
                              gid = make_kgid(current_user_ns(), option);
                              if (!gid_valid(gid))
                                      return -EINVAL;
                              opts->gid = gid;
                              opts->setgid = 1;
                              break;
                      case Opt_mode:
                              if (match_octal(&args[0], &option))
                                      return -EINVAL;
                              opts->mode = option & S_IALLUGO;
                              break;
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
                      case Opt_ptmxmode:
                              if (match_octal(&args[0], &option))
                                      return -EINVAL;
                              opts->ptmxmode = option & S_IALLUGO;
                              break;
                      case Opt_newinstance:
                              /* newinstance makes sense only on initial mount */
                              if (op == PARSE_MOUNT)
                                      opts->newinstance = 1;
                              break;
                      case Opt_max:
                              if (match_int(&args[0], &option) ||
                                  option < 0 || option > NR_UNIX98_PTY_MAX)
                                      return -EINVAL;
                              opts->max = option;
                              break;
      #endif
                      default:
                              pr_err("called with bogus options\n");
                              return -EINVAL;
                      }
              }
      
              return 0;
      }
      
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
      static int mknod_ptmx(struct super_block *sb)
      {
              int mode;
              int rc = -ENOMEM;
              struct dentry *dentry;
              struct inode *inode;
              struct dentry *root = sb->s_root;
              struct pts_fs_info *fsi = DEVPTS_SB(sb);
              struct pts_mount_opts *opts = &fsi->mount_opts;
              kuid_t root_uid;
              kgid_t root_gid;
      
              root_uid = make_kuid(current_user_ns(), 0);
              root_gid = make_kgid(current_user_ns(), 0);
              if (!uid_valid(root_uid) || !gid_valid(root_gid))
                      return -EINVAL;
      
              mutex_lock(&d_inode(root)->i_mutex);
      
              /* If we have already created ptmx node, return */
              if (fsi->ptmx_dentry) {
                      rc = 0;
                      goto out;
              }
      
              dentry = d_alloc_name(root, "ptmx");
              if (!dentry) {
                      pr_err("Unable to alloc dentry for ptmx node\n");
                      goto out;
              }
      
              /*
               * Create a new 'ptmx' node in this mount of devpts.
               */
              inode = new_inode(sb);
              if (!inode) {
                      pr_err("Unable to alloc inode for ptmx node\n");
                      dput(dentry);
                      goto out;
              }
      
              inode->i_ino = 2;
              inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
      
              mode = S_IFCHR|opts->ptmxmode;
              init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
              inode->i_uid = root_uid;
              inode->i_gid = root_gid;
      
              d_add(dentry, inode);
      
              fsi->ptmx_dentry = dentry;
              rc = 0;
      out:
              mutex_unlock(&d_inode(root)->i_mutex);
              return rc;
      }
      
      static void update_ptmx_mode(struct pts_fs_info *fsi)
      {
              struct inode *inode;
              if (fsi->ptmx_dentry) {
                      inode = d_inode(fsi->ptmx_dentry);
                      inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
              }
      }
      #else
      static inline void update_ptmx_mode(struct pts_fs_info *fsi)
      {
              return;
      }
      #endif
      
      static int devpts_remount(struct super_block *sb, int *flags, char *data)
      {
              int err;
              struct pts_fs_info *fsi = DEVPTS_SB(sb);
              struct pts_mount_opts *opts = &fsi->mount_opts;
      
              sync_filesystem(sb);
              err = parse_mount_options(data, PARSE_REMOUNT, opts);
      
              /*
               * parse_mount_options() restores options to default values
               * before parsing and may have changed ptmxmode. So, update the
               * mode in the inode too. Bogus options don't fail the remount,
               * so do this even on error return.
               */
              update_ptmx_mode(fsi);
      
              return err;
      }
      
      static int devpts_show_options(struct seq_file *seq, struct dentry *root)
      {
   38         struct pts_fs_info *fsi = DEVPTS_SB(root->d_sb);
              struct pts_mount_opts *opts = &fsi->mount_opts;
      
              if (opts->setuid)
                      seq_printf(seq, ",uid=%u",
                                 from_kuid_munged(&init_user_ns, opts->uid));
   38         if (opts->setgid)
   38                 seq_printf(seq, ",gid=%u",
                                 from_kgid_munged(&init_user_ns, opts->gid));
   38         seq_printf(seq, ",mode=%03o", opts->mode);
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
              seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
              if (opts->max < NR_UNIX98_PTY_MAX)
                      seq_printf(seq, ",max=%d", opts->max);
      #endif
      
              return 0;
      }
      
      static const struct super_operations devpts_sops = {
              .statfs                = simple_statfs,
              .remount_fs        = devpts_remount,
              .show_options        = devpts_show_options,
      };
      
      static void *new_pts_fs_info(struct super_block *sb)
      {
              struct pts_fs_info *fsi;
      
              fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL);
              if (!fsi)
                      return NULL;
      
              ida_init(&fsi->allocated_ptys);
              fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
              fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
              fsi->sb = sb;
      
              return fsi;
      }
      
      static int
      devpts_fill_super(struct super_block *s, void *data, int silent)
      {
              struct inode *inode;
      
              s->s_blocksize = 1024;
              s->s_blocksize_bits = 10;
              s->s_magic = DEVPTS_SUPER_MAGIC;
              s->s_op = &devpts_sops;
              s->s_time_gran = 1;
      
              s->s_fs_info = new_pts_fs_info(s);
              if (!s->s_fs_info)
                      goto fail;
      
              inode = new_inode(s);
              if (!inode)
                      goto fail;
              inode->i_ino = 1;
              inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
              inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
              inode->i_op = &simple_dir_inode_operations;
              inode->i_fop = &simple_dir_operations;
              set_nlink(inode, 2);
      
              s->s_root = d_make_root(inode);
              if (s->s_root)
                      return 0;
      
              pr_err("get root dentry failed\n");
      
      fail:
              return -ENOMEM;
      }
      
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
      static int compare_init_pts_sb(struct super_block *s, void *p)
      {
              if (devpts_mnt)
                      return devpts_mnt->mnt_sb == s;
              return 0;
      }
      
      /*
       * devpts_mount()
       *
       *     If the '-o newinstance' mount option was specified, mount a new
       *     (private) instance of devpts.  PTYs created in this instance are
       *     independent of the PTYs in other devpts instances.
       *
       *     If the '-o newinstance' option was not specified, mount/remount the
       *     initial kernel mount of devpts.  This type of mount gives the
       *     legacy, single-instance semantics.
       *
       *     The 'newinstance' option is needed to support multiple namespace
       *     semantics in devpts while preserving backward compatibility of the
       *     current 'single-namespace' semantics. i.e all mounts of devpts
       *     without the 'newinstance' mount option should bind to the initial
       *     kernel mount, like mount_single().
       *
       *     Mounts with 'newinstance' option create a new, private namespace.
       *
       *     NOTE:
       *
       *     For single-mount semantics, devpts cannot use mount_single(),
       *     because mount_single()/sget() find and use the super-block from
       *     the most recent mount of devpts. But that recent mount may be a
       *     'newinstance' mount and mount_single() would pick the newinstance
       *     super-block instead of the initial super-block.
       */
      static struct dentry *devpts_mount(struct file_system_type *fs_type,
              int flags, const char *dev_name, void *data)
      {
              int error;
              struct pts_mount_opts opts;
              struct super_block *s;
      
              error = parse_mount_options(data, PARSE_MOUNT, &opts);
              if (error)
                      return ERR_PTR(error);
      
              /* Require newinstance for all user namespace mounts to ensure
               * the mount options are not changed.
               */
              if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
                      return ERR_PTR(-EINVAL);
      
              if (opts.newinstance)
                      s = sget(fs_type, NULL, set_anon_super, flags, NULL);
              else
                      s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
                               NULL);
      
              if (IS_ERR(s))
                      return ERR_CAST(s);
      
              if (!s->s_root) {
                      error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                      if (error)
                              goto out_undo_sget;
                      s->s_flags |= MS_ACTIVE;
              }
      
              memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
      
              error = mknod_ptmx(s);
              if (error)
                      goto out_undo_sget;
      
              return dget(s->s_root);
      
      out_undo_sget:
              deactivate_locked_super(s);
              return ERR_PTR(error);
      }
      
      #else
      /*
       * This supports only the legacy single-instance semantics (no
       * multiple-instance semantics)
       */
      static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
                      const char *dev_name, void *data)
      {
              return mount_single(fs_type, flags, data, devpts_fill_super);
      }
      #endif
      
      static void devpts_kill_sb(struct super_block *sb)
      {
              struct pts_fs_info *fsi = DEVPTS_SB(sb);
      
              ida_destroy(&fsi->allocated_ptys);
              kfree(fsi);
              kill_litter_super(sb);
      }
      
      static struct file_system_type devpts_fs_type = {
              .name                = "devpts",
              .mount                = devpts_mount,
              .kill_sb        = devpts_kill_sb,
      #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
              .fs_flags        = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
      #endif
      };
      
      /*
       * The normal naming convention is simply /dev/pts/<number>; this conforms
       * to the System V naming convention
       */
      
      int devpts_new_index(struct pts_fs_info *fsi)
      {
              int index;
              int ida_ret;
      
  155         if (!fsi)
                      return -ENODEV;
      
      retry:
  155         if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
  155                 return -ENOMEM;
      
  155         mutex_lock(&allocated_ptys_lock);
  155         if (pty_count >= pty_limit -
                              (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
                      mutex_unlock(&allocated_ptys_lock);
                      return -ENOSPC;
              }
      
  155         ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
              if (ida_ret < 0) {
                      mutex_unlock(&allocated_ptys_lock);
                      if (ida_ret == -EAGAIN)
                              goto retry;
                      return -EIO;
              }
      
  155         if (index >= fsi->mount_opts.max) {
                      ida_remove(&fsi->allocated_ptys, index);
                      mutex_unlock(&allocated_ptys_lock);
                      return -ENOSPC;
              }
  155         pty_count++;
              mutex_unlock(&allocated_ptys_lock);
              return index;
      }
      
      void devpts_kill_index(struct pts_fs_info *fsi, int idx)
      {
   20         mutex_lock(&allocated_ptys_lock);
              ida_remove(&fsi->allocated_ptys, idx);
              pty_count--;
              mutex_unlock(&allocated_ptys_lock);
      }
      
      /*
       * pty code needs to hold extra references in case of last /dev/tty close
       */
      struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
      {
              struct super_block *sb;
              struct pts_fs_info *fsi;
      
  155         sb = pts_sb_from_inode(ptmx_inode);
              if (!sb)
                      return NULL;
  155         fsi = DEVPTS_SB(sb);
              if (!fsi)
                      return NULL;
      
  155         atomic_inc(&sb->s_active);
  155         return fsi;
      }
      
      void devpts_put_ref(struct pts_fs_info *fsi)
      {
   20         deactivate_super(fsi->sb);
      }
      
      /**
       * devpts_pty_new -- create a new inode in /dev/pts/
       * @ptmx_inode: inode of the master
       * @device: major+minor of the node to be created
       * @index: used as a name of the node
       * @priv: what's given back by devpts_get_priv
       *
       * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
       */
      struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
                      void *priv)
      {
              struct dentry *dentry;
              struct super_block *sb;
              struct inode *inode;
              struct dentry *root;
              struct pts_mount_opts *opts;
              char s[12];
      
  153         if (!fsi)
                      return ERR_PTR(-ENODEV);
      
  153         sb = fsi->sb;
              root = sb->s_root;
              opts = &fsi->mount_opts;
      
              inode = new_inode(sb);
              if (!inode)
                      return ERR_PTR(-ENOMEM);
      
  153         inode->i_ino = index + 3;
  153         inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
  153         inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
  153         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
              init_special_inode(inode, S_IFCHR|opts->mode, device);
              inode->i_private = priv;
      
              sprintf(s, "%d", index);
      
              mutex_lock(&d_inode(root)->i_mutex);
      
              dentry = d_alloc_name(root, s);
              if (dentry) {
  154                 d_add(dentry, inode);
  153                 fsnotify_create(d_inode(root), dentry);
              } else {
                      iput(inode);
                      inode = ERR_PTR(-ENOMEM);
              }
      
  153         mutex_unlock(&d_inode(root)->i_mutex);
      
  153         return inode;
      }
      
      /**
       * devpts_get_priv -- get private data for a slave
       * @pts_inode: inode of the slave
       *
       * Returns whatever was passed as priv in devpts_pty_new for a given inode.
       */
      void *devpts_get_priv(struct inode *pts_inode)
      {
              struct dentry *dentry;
              void *priv = NULL;
      
   51         BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
      
              /* Ensure dentry has not been deleted by devpts_pty_kill() */
   51         dentry = d_find_alias(pts_inode);
              if (!dentry)
                      return NULL;
      
   50         if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
   50                 priv = pts_inode->i_private;
      
   50         dput(dentry);
      
   51         return priv;
      }
      
      /**
       * devpts_pty_kill -- remove inode form /dev/pts/
       * @inode: inode of the slave to be removed
       *
       * This is an inverse operation of devpts_pty_new.
       */
      void devpts_pty_kill(struct inode *inode)
      {
   56         struct super_block *sb = pts_sb_from_inode(inode);
   56         struct dentry *root = sb->s_root;
              struct dentry *dentry;
      
              BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
      
   56         mutex_lock(&d_inode(root)->i_mutex);
      
              dentry = d_find_alias(inode);
      
              drop_nlink(inode);
              d_delete(dentry);
              dput(dentry);        /* d_alloc_name() in devpts_pty_new() */
              dput(dentry);                /* d_find_alias above */
      
              mutex_unlock(&d_inode(root)->i_mutex);
      }
      
      static int __init init_devpts_fs(void)
      {
              int err = register_filesystem(&devpts_fs_type);
              struct ctl_table_header *table;
      
              if (!err) {
                      struct vfsmount *mnt;
      
                      table = register_sysctl_table(pty_root_table);
                      mnt = kern_mount(&devpts_fs_type);
                      if (IS_ERR(mnt)) {
                              err = PTR_ERR(mnt);
                              unregister_filesystem(&devpts_fs_type);
                              unregister_sysctl_table(table);
                      } else {
                              devpts_mnt = mnt;
                      }
              }
              return err;
      }
      module_init(init_devpts_fs)
      /*
       * INET                An implementation of the TCP/IP protocol suite for the LINUX
       *                operating system.  INET is implemented using the  BSD Socket
       *                interface as the means of communication with the user level.
       *
       *                Implementation of the Transmission Control Protocol(TCP).
       *
       * Authors:        Ross Biro
       *                Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
       *                Mark Evans, <evansmp@uhura.aston.ac.uk>
       *                Corey Minyard <wf-rch!minyard@relay.EU.net>
       *                Florian La Roche, <flla@stud.uni-sb.de>
       *                Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
       *                Linus Torvalds, <torvalds@cs.helsinki.fi>
       *                Alan Cox, <gw4pts@gw4pts.ampr.org>
       *                Matthew Dillon, <dillon@apollo.west.oic.com>
       *                Arnt Gulbrandsen, <agulbra@nvg.unit.no>
       *                Jorge Cwik, <jorge@laser.satlink.net>
       */
      
      #include <linux/module.h>
      #include <linux/gfp.h>
      #include <net/tcp.h>
      
      int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
      int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
      int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME;
      int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES;
      int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
      int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
      int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
      int sysctl_tcp_orphan_retries __read_mostly;
      int sysctl_tcp_thin_linear_timeouts __read_mostly;
      
      static void tcp_write_err(struct sock *sk)
      {
              sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
              sk->sk_error_report(sk);
      
              tcp_done(sk);
              NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
      }
      
      /* Do not allow orphaned sockets to eat all our resources.
       * This is direct violation of TCP specs, but it is required
       * to prevent DoS attacks. It is called when a retransmission timeout
       * or zero probe timeout occurs on orphaned socket.
       *
       * Also close if our net namespace is exiting; in that case there is no
       * hope of ever communicating again since all netns interfaces are already
       * down (or about to be down), and we need to release our dst references,
       * which have been moved to the netns loopback interface, so the namespace
       * can finish exiting.  This condition is only possible if we are a kernel
       * socket, as those do not hold references to the namespace.
       *
       * Criteria is still not confirmed experimentally and may change.
       * We kill the socket, if:
       * 1. If number of orphaned sockets exceeds an administratively configured
       *    limit.
       * 2. If we have strong memory pressure.
       * 3. If our net namespace is exiting.
       */
      static int tcp_out_of_resources(struct sock *sk, bool do_reset)
      {
              struct tcp_sock *tp = tcp_sk(sk);
              int shift = 0;
      
              /* If peer does not open window for long time, or did not transmit
               * anything for long time, penalize it. */
              if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
                      shift++;
      
              /* If some dubious ICMP arrived, penalize even more. */
              if (sk->sk_err_soft)
                      shift++;
      
              if (tcp_check_oom(sk, shift)) {
                      /* Catch exceptional cases, when connection requires reset.
                       *      1. Last segment was sent recently. */
                      if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
                          /*  2. Window is closed. */
                          (!tp->snd_wnd && !tp->packets_out))
                              do_reset = true;
                      if (do_reset)
                              tcp_send_active_reset(sk, GFP_ATOMIC);
                      tcp_done(sk);
                      NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
                      return 1;
              }
      
              if (!check_net(sock_net(sk))) {
                      /* Not possible to send reset; just close */
                      tcp_done(sk);
                      return 1;
              }
      
              return 0;
      }
      
      /* Calculate maximal number or retries on an orphaned socket. */
      static int tcp_orphan_retries(struct sock *sk, bool alive)
      {
              int retries = sysctl_tcp_orphan_retries; /* May be zero. */
      
              /* We know from an ICMP that something is wrong. */
              if (sk->sk_err_soft && !alive)
                      retries = 0;
      
              /* However, if socket sent something recently, select some safe
               * number of retries. 8 corresponds to >100 seconds with minimal
               * RTO of 200msec. */
              if (retries == 0 && alive)
                      retries = 8;
              return retries;
      }
      
      static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
      {
              struct net *net = sock_net(sk);
      
              /* Black hole detection */
              if (net->ipv4.sysctl_tcp_mtu_probing) {
                      if (!icsk->icsk_mtup.enabled) {
                              icsk->icsk_mtup.enabled = 1;
                              icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
                              tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                      } else {
                              struct net *net = sock_net(sk);
                              struct tcp_sock *tp = tcp_sk(sk);
                              int mss;
      
                              mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
                              mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
                              mss = max(mss, 68 - tp->tcp_header_len);
                              mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
                              icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
                              tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                      }
              }
      }
      
      /* This function calculates a "timeout" which is equivalent to the timeout of a
       * TCP connection after "boundary" unsuccessful, exponentially backed-off
       * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
       * syn_set flag is set.
       */
      static bool retransmits_timed_out(struct sock *sk,
                                        unsigned int boundary,
                                        unsigned int timeout,
                                        bool syn_set)
      {
    2         unsigned int linear_backoff_thresh, start_ts;
              unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
    2 
              if (!inet_csk(sk)->icsk_retransmits)
                      return false;
    2 
              start_ts = tcp_sk(sk)->retrans_stamp;
              if (unlikely(!start_ts))
                      start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
    2 
    2         if (likely(timeout == 0)) {
                      linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
      
    2                 if (boundary <= linear_backoff_thresh)
                              timeout = ((2 << boundary) - 1) * rto_base;
                      else
                              timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
                                      (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
    2         }
              return (tcp_time_stamp - start_ts) >= timeout;
      }
      
      /* A write timeout has occurred. Process the after effects. */
      static int tcp_write_timeout(struct sock *sk)
      {
              struct inet_connection_sock *icsk = inet_csk(sk);
              struct tcp_sock *tp = tcp_sk(sk);
              int retry_until;
              bool do_reset, syn_set = false;
    2 
              if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
                      if (icsk->icsk_retransmits) {
                              dst_negative_advice(sk);
                              if (tp->syn_fastopen || tp->syn_data)
                                      tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
                              if (tp->syn_data && icsk->icsk_retransmits == 1)
                                      NET_INC_STATS_BH(sock_net(sk),
                                                       LINUX_MIB_TCPFASTOPENACTIVEFAIL);
                      }
                      retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
                      syn_set = true;
    2         } else {
                      if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
                              /* Some middle-boxes may black-hole Fast Open _after_
                               * the handshake. Therefore we conservatively disable
                               * Fast Open on this path on recurring timeouts with
                               * few or zero bytes acked after Fast Open.
                               */
                              if (tp->syn_data_acked &&
                                  tp->bytes_acked <= tp->rx_opt.mss_clamp) {
                                      tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
                                      if (icsk->icsk_retransmits == sysctl_tcp_retries1)
                                              NET_INC_STATS_BH(sock_net(sk),
                                                               LINUX_MIB_TCPFASTOPENACTIVEFAIL);
                              }
                              /* Black hole detection */
                              tcp_mtu_probing(icsk, sk);
      
                              dst_negative_advice(sk);
                      }
    2 
    2                 retry_until = sysctl_tcp_retries2;
                      if (sock_flag(sk, SOCK_DEAD)) {
                              const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
      
                              retry_until = tcp_orphan_retries(sk, alive);
                              do_reset = alive ||
                                      !retransmits_timed_out(sk, retry_until, 0, 0);
      
                              if (tcp_out_of_resources(sk, do_reset))
                                      return 1;
                      }
              }
    2 
              if (retransmits_timed_out(sk, retry_until,
                                        syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
                      /* Has it gone just too far? */
                      tcp_write_err(sk);
                      return 1;
              }
              return 0;
      }
      
      void tcp_delack_timer_handler(struct sock *sk)
      {
              struct tcp_sock *tp = tcp_sk(sk);
              struct inet_connection_sock *icsk = inet_csk(sk);
   50 
              sk_mem_reclaim_partial(sk);
   50 
   49         if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
                  !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
                      goto out;
   25 
   17         if (time_after(icsk->icsk_ack.timeout, jiffies)) {
                      sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
                      goto out;
    8         }
              icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
      
              if (!skb_queue_empty(&tp->ucopy.prequeue)) {
                      struct sk_buff *skb;
    1 
                      NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
    1 
    1                 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
                              sk_backlog_rcv(sk, skb);
    1 
                      tp->ucopy.memory = 0;
              }
      
    8         if (inet_csk_ack_scheduled(sk)) {
                      if (!icsk->icsk_ack.pingpong) {
    5                         /* Delayed ACK missed: inflate ATO. */
                              icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
                      } else {
                              /* Delayed ACK missed: leave pingpong mode and
                               * deflate ATO.
    4                          */
                              icsk->icsk_ack.pingpong = 0;
                              icsk->icsk_ack.ato      = TCP_ATO_MIN;
    8                 }
                      tcp_send_ack(sk);
                      NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
              }
      
   50 out:
              if (tcp_under_memory_pressure(sk))
   50                 sk_mem_reclaim(sk);
      }
      
      static void tcp_delack_timer(unsigned long data)
      {
              struct sock *sk = (struct sock *)data;
      
              bh_lock_sock(sk);
              if (!sock_owned_by_user(sk)) {
                      tcp_delack_timer_handler(sk);
              } else {
                      inet_csk(sk)->icsk_ack.blocked = 1;
                      NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
                      /* deleguate our work to tcp_release_cb() */
                      if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
                              sock_hold(sk);
              }
              bh_unlock_sock(sk);
              sock_put(sk);
      }
      
      static void tcp_probe_timer(struct sock *sk)
      {
              struct inet_connection_sock *icsk = inet_csk(sk);
              struct tcp_sock *tp = tcp_sk(sk);
              int max_probes;
              u32 start_ts;
    3 
              if (tp->packets_out || !tcp_send_head(sk)) {
                      icsk->icsk_probes_out = 0;
                      return;
              }
      
              /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
               * long as the receiver continues to respond probes. We support this by
               * default and reset icsk_probes_out with incoming ACKs. But if the
               * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
               * kill the socket when the retry count and the time exceeds the
               * corresponding system limit. We also implement similar policy when
               * we use RTO to probe window in tcp_retransmit_timer().
    3          */
              start_ts = tcp_skb_timestamp(tcp_send_head(sk));
    2         if (!start_ts)
    1                 skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
              else if (icsk->icsk_user_timeout &&
                       (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
                      goto abort;
    3 
    3         max_probes = sysctl_tcp_retries2;
              if (sock_flag(sk, SOCK_DEAD)) {
                      const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
      
                      max_probes = tcp_orphan_retries(sk, alive);
                      if (!alive && icsk->icsk_backoff >= max_probes)
                              goto abort;
                      if (tcp_out_of_resources(sk, true))
                              return;
              }
    3 
              if (icsk->icsk_probes_out > max_probes) {
      abort:                tcp_write_err(sk);
              } else {
    3                 /* Only send another probe if we didn't close things up. */
                      tcp_send_probe0(sk);
              }
      }
      
      /*
       *        Timer for Fast Open socket to retransmit SYNACK. Note that the
       *        sk here is the child socket, not the parent (listener) socket.
       */
      static void tcp_fastopen_synack_timer(struct sock *sk)
      {
              struct inet_connection_sock *icsk = inet_csk(sk);
              int max_retries = icsk->icsk_syn_retries ? :
                  sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
              struct request_sock *req;
      
              req = tcp_sk(sk)->fastopen_rsk;
              req->rsk_ops->syn_ack_timeout(req);
      
              if (req->num_timeout >= max_retries) {
                      tcp_write_err(sk);
                      return;
              }
              /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
               * returned from rtx_syn_ack() to make it more persistent like
               * regular retransmit because if the child socket has been accepted
               * it's not good to give up too easily.
               */
              inet_rtx_syn_ack(sk, req);
              req->num_timeout++;
              inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
      }
      
      /*
       *        The TCP retransmit timer.
       */
    2 
      void tcp_retransmit_timer(struct sock *sk)
      {
              struct tcp_sock *tp = tcp_sk(sk);
              struct inet_connection_sock *icsk = inet_csk(sk);
    2 
              if (tp->fastopen_rsk) {
                      WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
                                   sk->sk_state != TCP_FIN_WAIT1);
                      tcp_fastopen_synack_timer(sk);
                      /* Before we receive ACK to our SYN-ACK don't retransmit
                       * anything else (e.g., data or FIN segments).
                       */
                      return;
    2         }
              if (!tp->packets_out)
                      goto out;
    2 
              WARN_ON(tcp_write_queue_empty(sk));
    2 
              tp->tlp_high_seq = 0;
      
              if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
                  !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
                      /* Receiver dastardly shrinks window. Our retransmits
                       * become zero probes, but we should not timeout this
                       * connection. If the socket is an orphan, time it out,
                       * we cannot allow such beasts to hang infinitely.
                       */
                      struct inet_sock *inet = inet_sk(sk);
                      if (sk->sk_family == AF_INET) {
                              net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
                                                  &inet->inet_daddr,
                                                  ntohs(inet->inet_dport),
                                                  inet->inet_num,
                                                  tp->snd_una, tp->snd_nxt);
                      }
      #if IS_ENABLED(CONFIG_IPV6)
                      else if (sk->sk_family == AF_INET6) {
                              net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
                                                  &sk->sk_v6_daddr,
                                                  ntohs(inet->inet_dport),
                                                  inet->inet_num,
                                                  tp->snd_una, tp->snd_nxt);
                      }
      #endif
                      if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
                              tcp_write_err(sk);
                              goto out;
                      }
                      tcp_enter_loss(sk);
                      tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
                      __sk_dst_reset(sk);
                      goto out_reset_timer;
              }
    2 
              if (tcp_write_timeout(sk))
                      goto out;
    2 
              if (icsk->icsk_retransmits == 0) {
                      int mib_idx;
    2 
    2                 if (icsk->icsk_ca_state == TCP_CA_Recovery) {
                              if (tcp_is_sack(tp))
                                      mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
                              else
                                      mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
                      } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
                              mib_idx = LINUX_MIB_TCPLOSSFAILURES;
                      } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
                                 tp->sacked_out) {
                              if (tcp_is_sack(tp))
                                      mib_idx = LINUX_MIB_TCPSACKFAILURES;
                              else
                                      mib_idx = LINUX_MIB_TCPRENOFAILURES;
                      } else {
                              mib_idx = LINUX_MIB_TCPTIMEOUTS;
    2                 }
                      NET_INC_STATS_BH(sock_net(sk), mib_idx);
              }
    2 
              tcp_enter_loss(sk);
    2 
              if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
                      /* Retransmission failed because of local congestion,
                       * do not backoff.
                       */
                      if (!icsk->icsk_retransmits)
                              icsk->icsk_retransmits = 1;
                      inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                                                min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
                                                TCP_RTO_MAX);
                      goto out;
              }
      
              /* Increase the timeout each time we retransmit.  Note that
               * we do not increase the rtt estimate.  rto is initialized
               * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
               * that doubling rto each time is the least we can get away with.
               * In KA9Q, Karn uses this for the first few times, and then
               * goes to quadratic.  netBSD doubles, but only goes up to *64,
               * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
               * defined in the protocol as the maximum possible RTT.  I guess
               * we'll have to use something other than TCP to talk to the
               * University of Mars.
               *
               * PAWS allows us longer timeouts and large windows, so once
               * implemented ftp to mars will work nicely. We will have to fix
               * the 120 second clamps though!
    2          */
              icsk->icsk_backoff++;
              icsk->icsk_retransmits++;
      
      out_reset_timer:
              /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
               * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
               * might be increased if the stream oscillates between thin and thick,
               * thus the old value might already be too high compared to the value
               * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
               * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
               * exponential backoff behaviour to avoid continue hammering
               * linear-timeout retransmissions into a black hole
    2          */
    2         if (sk->sk_state == TCP_ESTABLISHED &&
                  (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
                  tcp_stream_is_thin(tp) &&
                  icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
                      icsk->icsk_backoff = 0;
                      icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
              } else {
    2                 /* Use normal (exponential) backoff */
                      icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
    2         }
              inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
              if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
                      __sk_dst_reset(sk);
    2 
      out:;
      }
      
      void tcp_write_timer_handler(struct sock *sk)
      {
              struct inet_connection_sock *icsk = inet_csk(sk);
              int event;
   94 
   93         if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
                  !icsk->icsk_pending)
                      goto out;
   90 
   72         if (time_after(icsk->icsk_timeout, jiffies)) {
                      sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
                      goto out;
              }
      
              event = icsk->icsk_pending;
   21 
              switch (event) {
    2         case ICSK_TIME_EARLY_RETRANS:
                      tcp_resume_early_retransmit(sk);
                      break;
   14         case ICSK_TIME_LOSS_PROBE:
                      tcp_send_loss_probe(sk);
                      break;
    2         case ICSK_TIME_RETRANS:
                      icsk->icsk_pending = 0;
                      tcp_retransmit_timer(sk);
                      break;
    3         case ICSK_TIME_PROBE0:
    3                 icsk->icsk_pending = 0;
                      tcp_probe_timer(sk);
                      break;
              }
      
   94 out:
   94         sk_mem_reclaim(sk);
      }
      
      static void tcp_write_timer(unsigned long data)
      {
              struct sock *sk = (struct sock *)data;
      
              bh_lock_sock(sk);
              if (!sock_owned_by_user(sk)) {
                      tcp_write_timer_handler(sk);
              } else {
                      /* deleguate our work to tcp_release_cb() */
                      if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
                              sock_hold(sk);
              }
              bh_unlock_sock(sk);
              sock_put(sk);
      }
      
      void tcp_syn_ack_timeout(const struct request_sock *req)
      {
              struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
      
              NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS);
      }
      EXPORT_SYMBOL(tcp_syn_ack_timeout);
      
      void tcp_set_keepalive(struct sock *sk, int val)
    5 {
              if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
                      return;
    4 
    3         if (val && !sock_flag(sk, SOCK_KEEPOPEN))
                      inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
    2         else if (!val)
                      inet_csk_delete_keepalive_timer(sk);
      }
      
      
      static void tcp_keepalive_timer (unsigned long data)
      {
              struct sock *sk = (struct sock *) data;
              struct inet_connection_sock *icsk = inet_csk(sk);
              struct tcp_sock *tp = tcp_sk(sk);
              u32 elapsed;
      
              /* Only process if socket is not in use. */
              bh_lock_sock(sk);
              if (sock_owned_by_user(sk)) {
                      /* Try again later. */
                      inet_csk_reset_keepalive_timer (sk, HZ/20);
                      goto out;
              }
      
              if (sk->sk_state == TCP_LISTEN) {
                      pr_err("Hmm... keepalive on a LISTEN ???\n");
                      goto out;
              }
      
              if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
                      if (tp->linger2 >= 0) {
                              const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
      
                              if (tmo > 0) {
                                      tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
                                      goto out;
                              }
                      }
                      tcp_send_active_reset(sk, GFP_ATOMIC);
                      goto death;
              }
      
              if (!sock_flag(sk, SOCK_KEEPOPEN) ||
                  ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
                      goto out;
      
              elapsed = keepalive_time_when(tp);
      
              /* It is alive without keepalive 8) */
              if (tp->packets_out || tcp_send_head(sk))
                      goto resched;
      
              elapsed = keepalive_time_elapsed(tp);
      
              if (elapsed >= keepalive_time_when(tp)) {
                      /* If the TCP_USER_TIMEOUT option is enabled, use that
                       * to determine when to timeout instead.
                       */
                      if ((icsk->icsk_user_timeout != 0 &&
                          elapsed >= icsk->icsk_user_timeout &&
                          icsk->icsk_probes_out > 0) ||
                          (icsk->icsk_user_timeout == 0 &&
                          icsk->icsk_probes_out >= keepalive_probes(tp))) {
                              tcp_send_active_reset(sk, GFP_ATOMIC);
                              tcp_write_err(sk);
                              goto out;
                      }
                      if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
                              icsk->icsk_probes_out++;
                              elapsed = keepalive_intvl_when(tp);
                      } else {
                              /* If keepalive was lost due to local congestion,
                               * try harder.
                               */
                              elapsed = TCP_RESOURCE_PROBE_INTERVAL;
                      }
              } else {
                      /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
                      elapsed = keepalive_time_when(tp) - elapsed;
              }
      
              sk_mem_reclaim(sk);
      
      resched:
              inet_csk_reset_keepalive_timer (sk, elapsed);
              goto out;
      
      death:
              tcp_done(sk);
      
      out:
              bh_unlock_sock(sk);
              sock_put(sk);
      }
      
      void tcp_init_xmit_timers(struct sock *sk)
  218 {
              inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
                                        &tcp_keepalive_timer);
      }
      /* inflate.c -- zlib decompression
       * Copyright (C) 1995-2005 Mark Adler
       * For conditions of distribution and use, see copyright notice in zlib.h
       *
       * Based on zlib 1.2.3 but modified for the Linux Kernel by
       * Richard Purdie <richard@openedhand.com>
       *
       * Changes mainly for static instead of dynamic memory allocation
       *
       */
      
      #include <linux/zutil.h>
      #include "inftrees.h"
      #include "inflate.h"
      #include "inffast.h"
      #include "infutil.h"
      
      int zlib_inflate_workspacesize(void)
      {
    3     return sizeof(struct inflate_workspace);
      }
      
      int zlib_inflateReset(z_streamp strm)
      {
          struct inflate_state *state;
      
    3     if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
          state = (struct inflate_state *)strm->state;
    3     strm->total_in = strm->total_out = state->total = 0;
          strm->msg = NULL;
          strm->adler = 1;        /* to support ill-conceived Java test suite */
          state->mode = HEAD;
          state->last = 0;
          state->havedict = 0;
          state->dmax = 32768U;
          state->hold = 0;
          state->bits = 0;
          state->lencode = state->distcode = state->next = state->codes;
      
          /* Initialise Window */
          state->wsize = 1U << state->wbits;
          state->write = 0;
          state->whave = 0;
      
    3     return Z_OK;
      }
      
      int zlib_inflateInit2(z_streamp strm, int windowBits)
      {
          struct inflate_state *state;
      
    3     if (strm == NULL) return Z_STREAM_ERROR;
    3     strm->msg = NULL;                 /* in case we return an error */
      
          state = &WS(strm)->inflate_state;
          strm->state = (struct internal_state *)state;
      
          if (windowBits < 0) {
              state->wrap = 0;
    3         windowBits = -windowBits;
          }
          else {
    3         state->wrap = (windowBits >> 4) + 1;
          }
          if (windowBits < 8 || windowBits > 15) {
              return Z_STREAM_ERROR;
          }
    3     state->wbits = (unsigned)windowBits;
          state->window = &WS(strm)->working_window[0];
      
    3     return zlib_inflateReset(strm);
      }
      
      /*
         Return state with length and distance decoding tables and index sizes set to
         fixed code decoding.  This returns fixed tables from inffixed.h.
       */
      static void zlib_fixedtables(struct inflate_state *state)
      {
      #   include "inffixed.h"
          state->lencode = lenfix;
          state->lenbits = 9;
          state->distcode = distfix;
          state->distbits = 5;
      }
      
      
      /*
         Update the window with the last wsize (normally 32K) bytes written before
         returning. This is only called when a window is already in use, or when
         output has been written during this inflate call, but the end of the deflate
         stream has not been reached yet. It is also called to window dictionary data
         when a dictionary is loaded.
      
         Providing output buffers larger than 32K to inflate() should provide a speed
         advantage, since only the last 32K of output is copied to the sliding window
         upon return from inflate(), and since all distances after the first 32K of
         output will fall in the output data, making match copies simpler and faster.
         The advantage may be dependent on the size of the processor's data caches.
       */
      static void zlib_updatewindow(z_streamp strm, unsigned out)
      {
          struct inflate_state *state;
          unsigned copy, dist;
      
          state = (struct inflate_state *)strm->state;
      
          /* copy state->wsize or less output bytes into the circular window */
          copy = out - strm->avail_out;
          if (copy >= state->wsize) {
              memcpy(state->window, strm->next_out - state->wsize, state->wsize);
              state->write = 0;
              state->whave = state->wsize;
          }
          else {
              dist = state->wsize - state->write;
              if (dist > copy) dist = copy;
              memcpy(state->window + state->write, strm->next_out - copy, dist);
              copy -= dist;
              if (copy) {
                  memcpy(state->window, strm->next_out - copy, copy);
                  state->write = copy;
                  state->whave = state->wsize;
              }
              else {
                  state->write += dist;
                  if (state->write == state->wsize) state->write = 0;
                  if (state->whave < state->wsize) state->whave += dist;
              }
          }
      }
      
      
      /*
       * At the end of a Deflate-compressed PPP packet, we expect to have seen
       * a `stored' block type value but not the (zero) length bytes.
       */
      /*
         Returns true if inflate is currently at the end of a block generated by
         Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
         implementation to provide an additional safety check. PPP uses
         Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
         block. When decompressing, PPP checks that at the end of input packet,
         inflate is waiting for these length bytes.
       */
      static int zlib_inflateSyncPacket(z_streamp strm)
      {
          struct inflate_state *state;
      
          if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
          state = (struct inflate_state *)strm->state;
      
          if (state->mode == STORED && state->bits == 0) {
              state->mode = TYPE;
              return Z_OK;
          }
          return Z_DATA_ERROR;
      }
      
      /* Macros for inflate(): */
      
      /* check function to use adler32() for zlib or crc32() for gzip */
      #define UPDATE(check, buf, len) zlib_adler32(check, buf, len)
      
      /* Load registers with state in inflate() for speed */
      #define LOAD() \
          do { \
              put = strm->next_out; \
              left = strm->avail_out; \
              next = strm->next_in; \
              have = strm->avail_in; \
              hold = state->hold; \
              bits = state->bits; \
          } while (0)
      
      /* Restore state from registers in inflate() */
      #define RESTORE() \
          do { \
              strm->next_out = put; \
              strm->avail_out = left; \
              strm->next_in = next; \
              strm->avail_in = have; \
              state->hold = hold; \
              state->bits = bits; \
          } while (0)
      
      /* Clear the input bit accumulator */
      #define INITBITS() \
          do { \
              hold = 0; \
              bits = 0; \
          } while (0)
      
      /* Get a byte of input into the bit accumulator, or return from inflate()
         if there is no input available. */
      #define PULLBYTE() \
          do { \
              if (have == 0) goto inf_leave; \
              have--; \
              hold += (unsigned long)(*next++) << bits; \
              bits += 8; \
          } while (0)
      
      /* Assure that there are at least n bits in the bit accumulator.  If there is
         not enough available input to do that, then return from inflate(). */
      #define NEEDBITS(n) \
          do { \
              while (bits < (unsigned)(n)) \
                  PULLBYTE(); \
          } while (0)
      
      /* Return the low n bits of the bit accumulator (n < 16) */
      #define BITS(n) \
          ((unsigned)hold & ((1U << (n)) - 1))
      
      /* Remove n bits from the bit accumulator */
      #define DROPBITS(n) \
          do { \
              hold >>= (n); \
              bits -= (unsigned)(n); \
          } while (0)
      
      /* Remove zero to seven bits as needed to go to a byte boundary */
      #define BYTEBITS() \
          do { \
              hold >>= bits & 7; \
              bits -= bits & 7; \
          } while (0)
      
      /* Reverse the bytes in a 32-bit value */
      #define REVERSE(q) \
          ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
           (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
      
      /*
         inflate() uses a state machine to process as much input data and generate as
         much output data as possible before returning.  The state machine is
         structured roughly as follows:
      
          for (;;) switch (state) {
          ...
          case STATEn:
              if (not enough input data or output space to make progress)
                  return;
              ... make progress ...
              state = STATEm;
              break;
          ...
          }
      
         so when inflate() is called again, the same case is attempted again, and
         if the appropriate resources are provided, the machine proceeds to the
         next state.  The NEEDBITS() macro is usually the way the state evaluates
         whether it can proceed or should return.  NEEDBITS() does the return if
         the requested bits are not available.  The typical use of the BITS macros
         is:
      
              NEEDBITS(n);
              ... do something with BITS(n) ...
              DROPBITS(n);
      
         where NEEDBITS(n) either returns from inflate() if there isn't enough
         input left to load n bits into the accumulator, or it continues.  BITS(n)
         gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
         the low n bits off the accumulator.  INITBITS() clears the accumulator
         and sets the number of available bits to zero.  BYTEBITS() discards just
         enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
         and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
      
         NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
         if there is no input available.  The decoding of variable length codes uses
         PULLBYTE() directly in order to pull just enough bytes to decode the next
         code, and no more.
      
         Some states loop until they get enough input, making sure that enough
         state information is maintained to continue the loop where it left off
         if NEEDBITS() returns in the loop.  For example, want, need, and keep
         would all have to actually be part of the saved state in case NEEDBITS()
         returns:
      
          case STATEw:
              while (want < need) {
                  NEEDBITS(n);
                  keep[want++] = BITS(n);
                  DROPBITS(n);
              }
              state = STATEx;
          case STATEx:
      
         As shown above, if the next state is also the next case, then the break
         is omitted.
      
         A state may also return if there is not enough output space available to
         complete that state.  Those states are copying stored data, writing a
         literal byte, and copying a matching string.
      
         When returning, a "goto inf_leave" is used to update the total counters,
         update the check value, and determine whether any progress has been made
         during that inflate() call in order to return the proper return code.
         Progress is defined as a change in either strm->avail_in or strm->avail_out.
         When there is a window, goto inf_leave will update the window with the last
         output written.  If a goto inf_leave occurs in the middle of decompression
         and there is no window currently, goto inf_leave will create one and copy
         output to the window for the next call of inflate().
      
         In this implementation, the flush parameter of inflate() only affects the
         return code (per zlib.h).  inflate() always writes as much as possible to
         strm->next_out, given the space available and the provided input--the effect
         documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
         the allocation of and copying into a sliding window until necessary, which
         provides the effect documented in zlib.h for Z_FINISH when the entire input
         stream available.  So the only thing the flush parameter actually does is:
         when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
         will return Z_BUF_ERROR if it has not reached the end of the stream.
       */
      
      int zlib_inflate(z_streamp strm, int flush)
      {
          struct inflate_state *state;
          const unsigned char *next;  /* next input */
          unsigned char *put;         /* next output */
          unsigned have, left;        /* available input and output */
          unsigned long hold;         /* bit buffer */
          unsigned bits;              /* bits in bit buffer */
          unsigned in, out;           /* save starting available input and output */
          unsigned copy;              /* number of stored or match bytes to copy */
          unsigned char *from;        /* where to copy match bytes from */
          code this;                  /* current decoding table entry */
          code last;                  /* parent table entry */
          unsigned len;               /* length to copy for repeats, bits to drop */
          int ret;                    /* return code */
          static const unsigned short order[19] = /* permutation of code lengths */
              {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
      
          /* Do not check for strm->next_out == NULL here as ppc zImage
             inflates to strm->next_out = 0 */
      
          if (strm == NULL || strm->state == NULL ||
              (strm->next_in == NULL && strm->avail_in != 0))
              return Z_STREAM_ERROR;
      
          state = (struct inflate_state *)strm->state;
      
          if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
          LOAD();
          in = have;
          out = left;
          ret = Z_OK;
          for (;;)
              switch (state->mode) {
              case HEAD:
                  if (state->wrap == 0) {
                      state->mode = TYPEDO;
                      break;
                  }
                  NEEDBITS(16);
                  if (
                      ((BITS(8) << 8) + (hold >> 8)) % 31) {
                      strm->msg = (char *)"incorrect header check";
                      state->mode = BAD;
                      break;
                  }
                  if (BITS(4) != Z_DEFLATED) {
                      strm->msg = (char *)"unknown compression method";
                      state->mode = BAD;
                      break;
                  }
                  DROPBITS(4);
                  len = BITS(4) + 8;
                  if (len > state->wbits) {
                      strm->msg = (char *)"invalid window size";
                      state->mode = BAD;
                      break;
                  }
                  state->dmax = 1U << len;
                  strm->adler = state->check = zlib_adler32(0L, NULL, 0);
                  state->mode = hold & 0x200 ? DICTID : TYPE;
                  INITBITS();
                  break;
              case DICTID:
                  NEEDBITS(32);
                  strm->adler = state->check = REVERSE(hold);
                  INITBITS();
                  state->mode = DICT;
              case DICT:
                  if (state->havedict == 0) {
                      RESTORE();
                      return Z_NEED_DICT;
                  }
                  strm->adler = state->check = zlib_adler32(0L, NULL, 0);
                  state->mode = TYPE;
              case TYPE:
                  if (flush == Z_BLOCK) goto inf_leave;
              case TYPEDO:
                  if (state->last) {
                      BYTEBITS();
                      state->mode = CHECK;
                      break;
                  }
                  NEEDBITS(3);
                  state->last = BITS(1);
                  DROPBITS(1);
                  switch (BITS(2)) {
                  case 0:                             /* stored block */
                      state->mode = STORED;
                      break;
                  case 1:                             /* fixed block */
                      zlib_fixedtables(state);
                      state->mode = LEN;              /* decode codes */
                      break;
                  case 2:                             /* dynamic block */
                      state->mode = TABLE;
                      break;
                  case 3:
                      strm->msg = (char *)"invalid block type";
                      state->mode = BAD;
                  }
                  DROPBITS(2);
                  break;
              case STORED:
                  BYTEBITS();                         /* go to byte boundary */
                  NEEDBITS(32);
                  if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
                      strm->msg = (char *)"invalid stored block lengths";
                      state->mode = BAD;
                      break;
                  }
                  state->length = (unsigned)hold & 0xffff;
                  INITBITS();
                  state->mode = COPY;
              case COPY:
                  copy = state->length;
                  if (copy) {
                      if (copy > have) copy = have;
                      if (copy > left) copy = left;
                      if (copy == 0) goto inf_leave;
                      memcpy(put, next, copy);
                      have -= copy;
                      next += copy;
                      left -= copy;
                      put += copy;
                      state->length -= copy;
                      break;
                  }
                  state->mode = TYPE;
                  break;
              case TABLE:
                  NEEDBITS(14);
                  state->nlen = BITS(5) + 257;
                  DROPBITS(5);
                  state->ndist = BITS(5) + 1;
                  DROPBITS(5);
                  state->ncode = BITS(4) + 4;
                  DROPBITS(4);
      #ifndef PKZIP_BUG_WORKAROUND
                  if (state->nlen > 286 || state->ndist > 30) {
                      strm->msg = (char *)"too many length or distance symbols";
                      state->mode = BAD;
                      break;
                  }
      #endif
                  state->have = 0;
                  state->mode = LENLENS;
              case LENLENS:
                  while (state->have < state->ncode) {
                      NEEDBITS(3);
                      state->lens[order[state->have++]] = (unsigned short)BITS(3);
                      DROPBITS(3);
                  }
                  while (state->have < 19)
                      state->lens[order[state->have++]] = 0;
                  state->next = state->codes;
                  state->lencode = (code const *)(state->next);
                  state->lenbits = 7;
                  ret = zlib_inflate_table(CODES, state->lens, 19, &(state->next),
                                      &(state->lenbits), state->work);
                  if (ret) {
                      strm->msg = (char *)"invalid code lengths set";
                      state->mode = BAD;
                      break;
                  }
                  state->have = 0;
                  state->mode = CODELENS;
              case CODELENS:
                  while (state->have < state->nlen + state->ndist) {
                      for (;;) {
                          this = state->lencode[BITS(state->lenbits)];
                          if ((unsigned)(this.bits) <= bits) break;
                          PULLBYTE();
                      }
                      if (this.val < 16) {
                          NEEDBITS(this.bits);
                          DROPBITS(this.bits);
                          state->lens[state->have++] = this.val;
                      }
                      else {
                          if (this.val == 16) {
                              NEEDBITS(this.bits + 2);
                              DROPBITS(this.bits);
                              if (state->have == 0) {
                                  strm->msg = (char *)"invalid bit length repeat";
                                  state->mode = BAD;
                                  break;
                              }
                              len = state->lens[state->have - 1];
                              copy = 3 + BITS(2);
                              DROPBITS(2);
                          }
                          else if (this.val == 17) {
                              NEEDBITS(this.bits + 3);
                              DROPBITS(this.bits);
                              len = 0;
                              copy = 3 + BITS(3);
                              DROPBITS(3);
                          }
                          else {
                              NEEDBITS(this.bits + 7);
                              DROPBITS(this.bits);
                              len = 0;
                              copy = 11 + BITS(7);
                              DROPBITS(7);
                          }
                          if (state->have + copy > state->nlen + state->ndist) {
                              strm->msg = (char *)"invalid bit length repeat";
                              state->mode = BAD;
                              break;
                          }
                          while (copy--)
                              state->lens[state->have++] = (unsigned short)len;
                      }
                  }
      
                  /* handle error breaks in while */
                  if (state->mode == BAD) break;
      
                  /* build code tables */
                  state->next = state->codes;
                  state->lencode = (code const *)(state->next);
                  state->lenbits = 9;
                  ret = zlib_inflate_table(LENS, state->lens, state->nlen, &(state->next),
                                      &(state->lenbits), state->work);
                  if (ret) {
                      strm->msg = (char *)"invalid literal/lengths set";
                      state->mode = BAD;
                      break;
                  }
                  state->distcode = (code const *)(state->next);
                  state->distbits = 6;
                  ret = zlib_inflate_table(DISTS, state->lens + state->nlen, state->ndist,
                                  &(state->next), &(state->distbits), state->work);
                  if (ret) {
                      strm->msg = (char *)"invalid distances set";
                      state->mode = BAD;
                      break;
                  }
                  state->mode = LEN;
              case LEN:
                  if (have >= 6 && left >= 258) {
                      RESTORE();
                      inflate_fast(strm, out);
                      LOAD();
                      break;
                  }
                  for (;;) {
                      this = state->lencode[BITS(state->lenbits)];
                      if ((unsigned)(this.bits) <= bits) break;
                      PULLBYTE();
                  }
                  if (this.op && (this.op & 0xf0) == 0) {
                      last = this;
                      for (;;) {
                          this = state->lencode[last.val +
                                  (BITS(last.bits + last.op) >> last.bits)];
                          if ((unsigned)(last.bits + this.bits) <= bits) break;
                          PULLBYTE();
                      }
                      DROPBITS(last.bits);
                  }
                  DROPBITS(this.bits);
                  state->length = (unsigned)this.val;
                  if ((int)(this.op) == 0) {
                      state->mode = LIT;
                      break;
                  }
                  if (this.op & 32) {
                      state->mode = TYPE;
                      break;
                  }
                  if (this.op & 64) {
                      strm->msg = (char *)"invalid literal/length code";
                      state->mode = BAD;
                      break;
                  }
                  state->extra = (unsigned)(this.op) & 15;
                  state->mode = LENEXT;
              case LENEXT:
                  if (state->extra) {
                      NEEDBITS(state->extra);
                      state->length += BITS(state->extra);
                      DROPBITS(state->extra);
                  }
                  state->mode = DIST;
              case DIST:
                  for (;;) {
                      this = state->distcode[BITS(state->distbits)];
                      if ((unsigned)(this.bits) <= bits) break;
                      PULLBYTE();
                  }
                  if ((this.op & 0xf0) == 0) {
                      last = this;
                      for (;;) {
                          this = state->distcode[last.val +
                                  (BITS(last.bits + last.op) >> last.bits)];
                          if ((unsigned)(last.bits + this.bits) <= bits) break;
                          PULLBYTE();
                      }
                      DROPBITS(last.bits);
                  }
                  DROPBITS(this.bits);
                  if (this.op & 64) {
                      strm->msg = (char *)"invalid distance code";
                      state->mode = BAD;
                      break;
                  }
                  state->offset = (unsigned)this.val;
                  state->extra = (unsigned)(this.op) & 15;
                  state->mode = DISTEXT;
              case DISTEXT:
                  if (state->extra) {
                      NEEDBITS(state->extra);
                      state->offset += BITS(state->extra);
                      DROPBITS(state->extra);
                  }
      #ifdef INFLATE_STRICT
                  if (state->offset > state->dmax) {
                      strm->msg = (char *)"invalid distance too far back";
                      state->mode = BAD;
                      break;
                  }
      #endif
                  if (state->offset > state->whave + out - left) {
                      strm->msg = (char *)"invalid distance too far back";
                      state->mode = BAD;
                      break;
                  }
                  state->mode = MATCH;
              case MATCH:
                  if (left == 0) goto inf_leave;
                  copy = out - left;
                  if (state->offset > copy) {         /* copy from window */
                      copy = state->offset - copy;
                      if (copy > state->write) {
                          copy -= state->write;
                          from = state->window + (state->wsize - copy);
                      }
                      else
                          from = state->window + (state->write - copy);
                      if (copy > state->length) copy = state->length;
                  }
                  else {                              /* copy from output */
                      from = put - state->offset;
                      copy = state->length;
                  }
                  if (copy > left) copy = left;
                  left -= copy;
                  state->length -= copy;
                  do {
                      *put++ = *from++;
                  } while (--copy);
                  if (state->length == 0) state->mode = LEN;
                  break;
              case LIT:
                  if (left == 0) goto inf_leave;
                  *put++ = (unsigned char)(state->length);
                  left--;
                  state->mode = LEN;
                  break;
              case CHECK:
                  if (state->wrap) {
                      NEEDBITS(32);
                      out -= left;
                      strm->total_out += out;
                      state->total += out;
                      if (out)
                          strm->adler = state->check =
                              UPDATE(state->check, put - out, out);
                      out = left;
                      if ((
                           REVERSE(hold)) != state->check) {
                          strm->msg = (char *)"incorrect data check";
                          state->mode = BAD;
                          break;
                      }
                      INITBITS();
                  }
                  state->mode = DONE;
              case DONE:
                  ret = Z_STREAM_END;
                  goto inf_leave;
              case BAD:
                  ret = Z_DATA_ERROR;
                  goto inf_leave;
              case MEM:
                  return Z_MEM_ERROR;
              case SYNC:
              default:
                  return Z_STREAM_ERROR;
              }
      
          /*
             Return from inflate(), updating the total counts and the check value.
             If there was no progress during the inflate() call, return a buffer
             error.  Call zlib_updatewindow() to create and/or update the window state.
           */
        inf_leave:
          RESTORE();
          if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
              zlib_updatewindow(strm, out);
      
          in -= strm->avail_in;
          out -= strm->avail_out;
          strm->total_in += in;
          strm->total_out += out;
          state->total += out;
          if (state->wrap && out)
              strm->adler = state->check =
                  UPDATE(state->check, strm->next_out - out, out);
      
          strm->data_type = state->bits + (state->last ? 64 : 0) +
                            (state->mode == TYPE ? 128 : 0);
      
          if (flush == Z_PACKET_FLUSH && ret == Z_OK &&
                  strm->avail_out != 0 && strm->avail_in == 0)
                      return zlib_inflateSyncPacket(strm);
      
          if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
              ret = Z_BUF_ERROR;
      
          return ret;
      }
      
      int zlib_inflateEnd(z_streamp strm)
      {
          if (strm == NULL || strm->state == NULL)
              return Z_STREAM_ERROR;
          return Z_OK;
      }
      
      /*
       * This subroutine adds the data at next_in/avail_in to the output history
       * without performing any output.  The output buffer must be "caught up";
       * i.e. no pending output but this should always be the case. The state must
       * be waiting on the start of a block (i.e. mode == TYPE or HEAD).  On exit,
       * the output will also be caught up, and the checksum will have been updated
       * if need be.
       */
      int zlib_inflateIncomp(z_stream *z)
      {
          struct inflate_state *state = (struct inflate_state *)z->state;
          Byte *saved_no = z->next_out;
          uInt saved_ao = z->avail_out;
      
          if (state->mode != TYPE && state->mode != HEAD)
              return Z_DATA_ERROR;
      
          /* Setup some variables to allow misuse of updateWindow */
          z->avail_out = 0;
          z->next_out = (unsigned char*)z->next_in + z->avail_in;
      
          zlib_updatewindow(z, z->avail_in);
      
          /* Restore saved variables */
          z->avail_out = saved_ao;
          z->next_out = saved_no;
      
          z->adler = state->check =
              UPDATE(state->check, z->next_in, z->avail_in);
      
          z->total_out += z->avail_in;
          z->total_in += z->avail_in;
          z->next_in += z->avail_in;
          state->total += z->avail_in;
          z->avail_in = 0;
      
          return Z_OK;
      }
      #undef TRACE_SYSTEM
      #define TRACE_SYSTEM fib
      
      #if !defined(_TRACE_FIB_H) || defined(TRACE_HEADER_MULTI_READ)
      #define _TRACE_FIB_H
      
      #include <linux/skbuff.h>
      #include <linux/netdevice.h>
      #include <net/ip_fib.h>
      #include <linux/tracepoint.h>
      
  726 TRACE_EVENT(fib_table_lookup,
      
              TP_PROTO(u32 tb_id, const struct flowi4 *flp),
      
              TP_ARGS(tb_id, flp),
      
              TP_STRUCT__entry(
                      __field(        u32,        tb_id                )
                      __field(        int,        oif                )
                      __field(        int,        iif                )
                      __field(        __u8,        tos                )
                      __field(        __u8,        scope                )
                      __field(        __u8,        flags                )
                      __array(        __u8,        src,        4        )
                      __array(        __u8,        dst,        4        )
              ),
      
              TP_fast_assign(
                      __be32 *p32;
      
                      __entry->tb_id = tb_id;
                      __entry->oif = flp->flowi4_oif;
                      __entry->iif = flp->flowi4_iif;
                      __entry->tos = flp->flowi4_tos;
                      __entry->scope = flp->flowi4_scope;
                      __entry->flags = flp->flowi4_flags;
      
                      p32 = (__be32 *) __entry->src;
                      *p32 = flp->saddr;
      
                      p32 = (__be32 *) __entry->dst;
                      *p32 = flp->daddr;
              ),
      
              TP_printk("table %u oif %d iif %d src %pI4 dst %pI4 tos %d scope %d flags %x",
                        __entry->tb_id, __entry->oif, __entry->iif,
                        __entry->src, __entry->dst, __entry->tos, __entry->scope,
                        __entry->flags)
      );
      
  550 TRACE_EVENT(fib_table_lookup_nh,
      
              TP_PROTO(const struct fib_nh *nh),
      
              TP_ARGS(nh),
      
              TP_STRUCT__entry(
                      __string(        name,        nh->nh_dev->name)
                      __field(        int,        oif                )
                      __array(        __u8,        src,        4        )
              ),
      
              TP_fast_assign(
                      __be32 *p32 = (__be32 *) __entry->src;
      
                      __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "not set");
                      __entry->oif = nh->nh_oif;
                      *p32 = nh->nh_saddr;
              ),
      
              TP_printk("nexthop dev %s oif %d src %pI4",
                        __get_str(name), __entry->oif, __entry->src)
      );
      
    5 TRACE_EVENT(fib_validate_source,
      
              TP_PROTO(const struct net_device *dev, const struct flowi4 *flp),
      
              TP_ARGS(dev, flp),
      
              TP_STRUCT__entry(
                      __string(        name,        dev->name        )
                      __field(        int,        oif                )
                      __field(        int,        iif                )
                      __field(        __u8,        tos                )
                      __array(        __u8,        src,        4        )
                      __array(        __u8,        dst,        4        )
              ),
      
              TP_fast_assign(
                      __be32 *p32;
      
                      __assign_str(name, dev ? dev->name : "not set");
                      __entry->oif = flp->flowi4_oif;
                      __entry->iif = flp->flowi4_iif;
                      __entry->tos = flp->flowi4_tos;
      
                      p32 = (__be32 *) __entry->src;
                      *p32 = flp->saddr;
      
                      p32 = (__be32 *) __entry->dst;
                      *p32 = flp->daddr;
              ),
      
              TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4",
                        __get_str(name), __entry->oif, __entry->iif, __entry->tos,
                        __entry->src, __entry->dst)
      );
      #endif /* _TRACE_FIB_H */
      
      /* This part must be outside protection */
      #include <trace/define_trace.h>
      /*
       *  Copyright (C) 1991, 1992  Linus Torvalds
       */
      
      /*
       * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
       * or rs-channels. It also implements echoing, cooked mode etc.
       *
       * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
       *
       * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
       * tty_struct and tty_queue structures.  Previously there was an array
       * of 256 tty_struct's which was statically allocated, and the
       * tty_queue structures were allocated at boot time.  Both are now
       * dynamically allocated only when the tty is open.
       *
       * Also restructured routines so that there is more of a separation
       * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
       * the low-level tty routines (serial.c, pty.c, console.c).  This
       * makes for cleaner and more compact code.  -TYT, 9/17/92
       *
       * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
       * which can be dynamically activated and de-activated by the line
       * discipline handling modules (like SLIP).
       *
       * NOTE: pay no attention to the line discipline code (yet); its
       * interface is still subject to change in this version...
       * -- TYT, 1/31/92
       *
       * Added functionality to the OPOST tty handling.  No delays, but all
       * other bits should be there.
       *        -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
       *
       * Rewrote canonical mode and added more termios flags.
       *         -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
       *
       * Reorganized FASYNC support so mouse code can share it.
       *        -- ctm@ardi.com, 9Sep95
       *
       * New TIOCLINUX variants added.
       *        -- mj@k332.feld.cvut.cz, 19-Nov-95
       *
       * Restrict vt switching via ioctl()
       *      -- grif@cs.ucr.edu, 5-Dec-95
       *
       * Move console and virtual terminal code to more appropriate files,
       * implement CONFIG_VT and generalize console device interface.
       *        -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
       *
       * Rewrote tty_init_dev and tty_release_dev to eliminate races.
       *        -- Bill Hawes <whawes@star.net>, June 97
       *
       * Added devfs support.
       *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
       *
       * Added support for a Unix98-style ptmx device.
       *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
       *
       * Reduced memory usage for older ARM systems
       *      -- Russell King <rmk@arm.linux.org.uk>
       *
       * Move do_SAK() into process context.  Less stack use in devfs functions.
       * alloc_tty_struct() always uses kmalloc()
       *                         -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
       */
      
      #include <linux/types.h>
      #include <linux/major.h>
      #include <linux/errno.h>
      #include <linux/signal.h>
      #include <linux/fcntl.h>
      #include <linux/sched.h>
      #include <linux/interrupt.h>
      #include <linux/tty.h>
      #include <linux/tty_driver.h>
      #include <linux/tty_flip.h>
      #include <linux/devpts_fs.h>
      #include <linux/file.h>
      #include <linux/fdtable.h>
      #include <linux/console.h>
      #include <linux/timer.h>
      #include <linux/ctype.h>
      #include <linux/kd.h>
      #include <linux/mm.h>
      #include <linux/string.h>
      #include <linux/slab.h>
      #include <linux/poll.h>
      #include <linux/proc_fs.h>
      #include <linux/init.h>
      #include <linux/module.h>
      #include <linux/device.h>
      #include <linux/wait.h>
      #include <linux/bitops.h>
      #include <linux/delay.h>
      #include <linux/seq_file.h>
      #include <linux/serial.h>
      #include <linux/ratelimit.h>
      
      #include <linux/uaccess.h>
      
      #include <linux/kbd_kern.h>
      #include <linux/vt_kern.h>
      #include <linux/selection.h>
      
      #include <linux/kmod.h>
      #include <linux/nsproxy.h>
      
      #undef TTY_DEBUG_HANGUP
      #ifdef TTY_DEBUG_HANGUP
      # define tty_debug_hangup(tty, f, args...)        tty_debug(tty, f, ##args)
      #else
      # define tty_debug_hangup(tty, f, args...)        do { } while (0)
      #endif
      
      #define TTY_PARANOIA_CHECK 1
      #define CHECK_TTY_COUNT 1
      
      struct ktermios tty_std_termios = {        /* for the benefit of tty drivers  */
              .c_iflag = ICRNL | IXON,
              .c_oflag = OPOST | ONLCR,
              .c_cflag = B38400 | CS8 | CREAD | HUPCL,
              .c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
                         ECHOCTL | ECHOKE | IEXTEN,
              .c_cc = INIT_C_CC,
              .c_ispeed = 38400,
              .c_ospeed = 38400
      };
      
      EXPORT_SYMBOL(tty_std_termios);
      
      /* This list gets poked at by procfs and various bits of boot up code. This
         could do with some rationalisation such as pulling the tty proc function
         into this file */
      
      LIST_HEAD(tty_drivers);                        /* linked list of tty drivers */
      
      /* Mutex to protect creating and releasing a tty. This is shared with
         vt.c for deeply disgusting hack reasons */
      DEFINE_MUTEX(tty_mutex);
      EXPORT_SYMBOL(tty_mutex);
      
      /* Spinlock to protect the tty->tty_files list */
      DEFINE_SPINLOCK(tty_files_lock);
      
      static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
      static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
      ssize_t redirected_tty_write(struct file *, const char __user *,
                                                              size_t, loff_t *);
      static unsigned int tty_poll(struct file *, poll_table *);
      static int tty_open(struct inode *, struct file *);
      long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
      #ifdef CONFIG_COMPAT
      static long tty_compat_ioctl(struct file *file, unsigned int cmd,
                                      unsigned long arg);
      #else
      #define tty_compat_ioctl NULL
      #endif
      static int __tty_fasync(int fd, struct file *filp, int on);
      static int tty_fasync(int fd, struct file *filp, int on);
      static void release_tty(struct tty_struct *tty, int idx);
      
      /**
       *        free_tty_struct                -        free a disused tty
       *        @tty: tty struct to free
       *
       *        Free the write buffers, tty queue and tty memory itself.
       *
       *        Locking: none. Must be called after tty is definitely unused
       */
      
      void free_tty_struct(struct tty_struct *tty)
      {
              if (!tty)
                      return;
              put_device(tty->dev);
              kfree(tty->write_buf);
              tty->magic = 0xDEADDEAD;
              kfree(tty);
      }
      
      static inline struct tty_struct *file_tty(struct file *file)
      {
  369         return ((struct tty_file_private *)file->private_data)->tty;
      }
      
      int tty_alloc_file(struct file *file)
      {
              struct tty_file_private *priv;
      
  196         priv = kmalloc(sizeof(*priv), GFP_KERNEL);
              if (!priv)
                      return -ENOMEM;
      
  196         file->private_data = priv;
      
  155         return 0;
      }
      
      /* Associate a new file with the tty structure */
      void tty_add_file(struct tty_struct *tty, struct file *file)
      {
  183         struct tty_file_private *priv = file->private_data;
      
              priv->tty = tty;
              priv->file = file;
      
              spin_lock(&tty_files_lock);
  183         list_add(&priv->list, &tty->tty_files);
  183         spin_unlock(&tty_files_lock);
      }
      
      /**
       * tty_free_file - free file->private_data
       *
       * This shall be used only for fail path handling when tty_add_file was not
       * called yet.
       */
      void tty_free_file(struct file *file)
      {
              struct tty_file_private *priv = file->private_data;
      
   14         file->private_data = NULL;
              kfree(priv);
      }
      
      /* Delete file from its tty */
      static void tty_del_file(struct file *file)
      {
   50         struct tty_file_private *priv = file->private_data;
      
              spin_lock(&tty_files_lock);
   50         list_del(&priv->list);
              spin_unlock(&tty_files_lock);
              tty_free_file(file);
      }
      
      
      #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
      
      /**
       *        tty_name        -        return tty naming
       *        @tty: tty structure
       *
       *        Convert a tty structure into a name. The name reflects the kernel
       *        naming policy and if udev is in use may not reflect user space
       *
       *        Locking: none
       */
      
      const char *tty_name(const struct tty_struct *tty)
      {
              if (!tty) /* Hmm.  NULL pointer.  That's fun. */
                      return "NULL tty";
              return tty->name;
      }
      
      EXPORT_SYMBOL(tty_name);
      
      int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
                                    const char *routine)
      {
      #ifdef TTY_PARANOIA_CHECK
  393         if (!tty) {
                      printk(KERN_WARNING
                              "null TTY for (%d:%d) in %s\n",
                              imajor(inode), iminor(inode), routine);
                      return 1;
              }
  393         if (tty->magic != TTY_MAGIC) {
                      printk(KERN_WARNING
                              "bad magic number for tty struct (%d:%d) in %s\n",
                              imajor(inode), iminor(inode), routine);
                      return 1;
              }
      #endif
  393         return 0;
      }
      
      /* Caller must hold tty_lock */
      static int check_tty_count(struct tty_struct *tty, const char *routine)
      {
      #ifdef CHECK_TTY_COUNT
              struct list_head *p;
              int count = 0;
      
  101         spin_lock(&tty_files_lock);
              list_for_each(p, &tty->tty_files) {
  101                 count++;
              }
  101         spin_unlock(&tty_files_lock);
              if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
                  tty->driver->subtype == PTY_TYPE_SLAVE &&
  100             tty->link && tty->link->count)
   99                 count++;
  101         if (tty->count != count) {
                      printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
                                          "!= #fd's(%d) in %s\n",
                             tty->name, tty->count, count, routine);
                      return count;
              }
      #endif
  101         return 0;
      }
      
      /**
       *        get_tty_driver                -        find device of a tty
       *        @dev_t: device identifier
       *        @index: returns the index of the tty
       *
       *        This routine returns a tty driver structure, given a device number
       *        and also passes back the index number.
       *
       *        Locking: caller must hold tty_mutex
       */
      
      static struct tty_driver *get_tty_driver(dev_t device, int *index)
      {
              struct tty_driver *p;
      
   51         list_for_each_entry(p, &tty_drivers, tty_drivers) {
   51                 dev_t base = MKDEV(p->major, p->minor_start);
   51                 if (device < base || device >= base + p->num)
                              continue;
   51                 *index = device - base;
                      return tty_driver_kref_get(p);
              }
              return NULL;
      }
      
      #ifdef CONFIG_CONSOLE_POLL
      
      /**
       *        tty_find_polling_driver        -        find device of a polled tty
       *        @name: name string to match
       *        @line: pointer to resulting tty line nr
       *
       *        This routine returns a tty driver structure, given a name
       *        and the condition that the tty driver is capable of polled
       *        operation.
       */
      struct tty_driver *tty_find_polling_driver(char *name, int *line)
      {
              struct tty_driver *p, *res = NULL;
              int tty_line = 0;
              int len;
              char *str, *stp;
      
              for (str = name; *str; str++)
                      if ((*str >= '0' && *str <= '9') || *str == ',')
                              break;
              if (!*str)
                      return NULL;
      
              len = str - name;
              tty_line = simple_strtoul(str, &str, 10);
      
              mutex_lock(&tty_mutex);
              /* Search through the tty devices to look for a match */
              list_for_each_entry(p, &tty_drivers, tty_drivers) {
                      if (!len || strncmp(name, p->name, len) != 0)
                              continue;
                      stp = str;
                      if (*stp == ',')
                              stp++;
                      if (*stp == '\0')
                              stp = NULL;
      
                      if (tty_line >= 0 && tty_line < p->num && p->ops &&
                          p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
                              res = tty_driver_kref_get(p);
                              *line = tty_line;
                              break;
                      }
              }
              mutex_unlock(&tty_mutex);
      
              return res;
      }
      EXPORT_SYMBOL_GPL(tty_find_polling_driver);
      #endif
      
      /**
       *        tty_check_change        -        check for POSIX terminal changes
       *        @tty: tty to check
       *
       *        If we try to write to, or set the state of, a terminal and we're
       *        not in the foreground, send a SIGTTOU.  If the signal is blocked or
       *        ignored, go ahead and perform the operation.  (POSIX 7.2)
       *
       *        Locking: ctrl_lock
       */
      
      int __tty_check_change(struct tty_struct *tty, int sig)
      {
              unsigned long flags;
              struct pid *pgrp, *tty_pgrp;
              int ret = 0;
      
  170         if (current->signal->tty != tty)
                      return 0;
      
              rcu_read_lock();
              pgrp = task_pgrp(current);
      
              spin_lock_irqsave(&tty->ctrl_lock, flags);
              tty_pgrp = tty->pgrp;
              spin_unlock_irqrestore(&tty->ctrl_lock, flags);
      
              if (tty_pgrp && pgrp != tty->pgrp) {
                      if (is_ignored(sig)) {
                              if (sig == SIGTTIN)
                                      ret = -EIO;
                      } else if (is_current_pgrp_orphaned())
                              ret = -EIO;
                      else {
                              kill_pgrp(pgrp, sig, 1);
                              set_thread_flag(TIF_SIGPENDING);
                              ret = -ERESTARTSYS;
                      }
              }
              rcu_read_unlock();
      
              if (!tty_pgrp) {
                      pr_warn("%s: tty_check_change: sig=%d, tty->pgrp == NULL!\n",
                              tty_name(tty), sig);
              }
      
              return ret;
      }
      
      int tty_check_change(struct tty_struct *tty)
      {
  132         return __tty_check_change(tty, SIGTTOU);
      }
      EXPORT_SYMBOL(tty_check_change);
      
      static ssize_t hung_up_tty_read(struct file *file, char __user *buf,
                                      size_t count, loff_t *ppos)
      {
    2         return 0;
      }
      
      static ssize_t hung_up_tty_write(struct file *file, const char __user *buf,
                                       size_t count, loff_t *ppos)
      {
    2         return -EIO;
      }
      
      /* No kernel lock held - none needed ;) */
      static unsigned int hung_up_tty_poll(struct file *filp, poll_table *wait)
      {
    8         return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
      }
      
      static long hung_up_tty_ioctl(struct file *file, unsigned int cmd,
                      unsigned long arg)
      {
    1         return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
      }
      
      static long hung_up_tty_compat_ioctl(struct file *file,
                                           unsigned int cmd, unsigned long arg)
      {
              return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
      }
      
      static const struct file_operations tty_fops = {
              .llseek                = no_llseek,
              .read                = tty_read,
              .write                = tty_write,
              .poll                = tty_poll,
              .unlocked_ioctl        = tty_ioctl,
              .compat_ioctl        = tty_compat_ioctl,
              .open                = tty_open,
              .release        = tty_release,
              .fasync                = tty_fasync,
      };
      
      static const struct file_operations console_fops = {
              .llseek                = no_llseek,
              .read                = tty_read,
              .write                = redirected_tty_write,
              .poll                = tty_poll,
              .unlocked_ioctl        = tty_ioctl,
              .compat_ioctl        = tty_compat_ioctl,
              .open                = tty_open,
              .release        = tty_release,
              .fasync                = tty_fasync,
      };
      
      static const struct file_operations hung_up_tty_fops = {
              .llseek                = no_llseek,
              .read                = hung_up_tty_read,
              .write                = hung_up_tty_write,
              .poll                = hung_up_tty_poll,
              .unlocked_ioctl        = hung_up_tty_ioctl,
              .compat_ioctl        = hung_up_tty_compat_ioctl,
              .release        = tty_release,
      };
      
      static DEFINE_SPINLOCK(redirect_lock);
      static struct file *redirect;
      
      
      void proc_clear_tty(struct task_struct *p)
      {
              unsigned long flags;
              struct tty_struct *tty;
              spin_lock_irqsave(&p->sighand->siglock, flags);
              tty = p->signal->tty;
              p->signal->tty = NULL;
              spin_unlock_irqrestore(&p->sighand->siglock, flags);
              tty_kref_put(tty);
      }
      
      extern void tty_sysctl_init(void);
      
      /**
       * proc_set_tty -  set the controlling terminal
       *
       * Only callable by the session leader and only if it does not already have
       * a controlling terminal.
       *
       * Caller must hold:  tty_lock()
       *                      a readlock on tasklist_lock
       *                      sighand lock
       */
      static void __proc_set_tty(struct tty_struct *tty)
      {
              unsigned long flags;
      
              spin_lock_irqsave(&tty->ctrl_lock, flags);
              /*
               * The session and fg pgrp references will be non-NULL if
               * tiocsctty() is stealing the controlling tty
               */
              put_pid(tty->session);
              put_pid(tty->pgrp);
              tty->pgrp = get_pid(task_pgrp(current));
              spin_unlock_irqrestore(&tty->ctrl_lock, flags);
              tty->session = get_pid(task_session(current));
              if (current->signal->tty) {
                      tty_debug(tty, "current tty %s not NULL!!\n",
                                current->signal->tty->name);
                      tty_kref_put(current->signal->tty);
              }
              put_pid(current->signal->tty_old_pgrp);
              current->signal->tty = tty_kref_get(tty);
              current->signal->tty_old_pgrp = NULL;
      }
      
      static void proc_set_tty(struct tty_struct *tty)
      {
              spin_lock_irq(&current->sighand->siglock);
              __proc_set_tty(tty);
              spin_unlock_irq(&current->sighand->siglock);
      }
      
      struct tty_struct *get_current_tty(void)
      {
              struct tty_struct *tty;
    9         unsigned long flags;
      
    9         spin_lock_irqsave(&current->sighand->siglock, flags);
              tty = tty_kref_get(current->signal->tty);
              spin_unlock_irqrestore(&current->sighand->siglock, flags);
              return tty;
      }
      EXPORT_SYMBOL_GPL(get_current_tty);
      
      static void session_clear_tty(struct pid *session)
      {
              struct task_struct *p;
              do_each_pid_task(session, PIDTYPE_SID, p) {
                      proc_clear_tty(p);
              } while_each_pid_task(session, PIDTYPE_SID, p);
      }
      
      /**
       *        tty_wakeup        -        request more data
       *        @tty: terminal
       *
       *        Internal and external helper for wakeups of tty. This function
       *        informs the line discipline if present that the driver is ready
       *        to receive more output data.
       */
      
      void tty_wakeup(struct tty_struct *tty)
      {
   71         struct tty_ldisc *ld;
      
              if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) {
                      ld = tty_ldisc_ref(tty);
                      if (ld) {
                              if (ld->ops->write_wakeup)
                                      ld->ops->write_wakeup(tty);
                              tty_ldisc_deref(ld);
   71                 }
              }
              wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
      }
      
      EXPORT_SYMBOL_GPL(tty_wakeup);
      
      /**
       *        tty_signal_session_leader        - sends SIGHUP to session leader
       *        @tty                controlling tty
       *        @exit_session        if non-zero, signal all foreground group processes
       *
       *        Send SIGHUP and SIGCONT to the session leader and its process group.
       *        Optionally, signal all processes in the foreground process group.
       *
       *        Returns the number of processes in the session with this tty
       *        as their controlling terminal. This value is used to drop
       *        tty references for those processes.
       */
      static int tty_signal_session_leader(struct tty_struct *tty, int exit_session)
      {
              struct task_struct *p;
              int refs = 0;
              struct pid *tty_pgrp = NULL;
      
              read_lock(&tasklist_lock);
              if (tty->session) {
                      do_each_pid_task(tty->session, PIDTYPE_SID, p) {
                              spin_lock_irq(&p->sighand->siglock);
                              if (p->signal->tty == tty) {
                                      p->signal->tty = NULL;
                                      /* We defer the dereferences outside fo
                                         the tasklist lock */
                                      refs++;
                              }
                              if (!p->signal->leader) {
                                      spin_unlock_irq(&p->sighand->siglock);
                                      continue;
                              }
                              __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
                              __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
                              put_pid(p->signal->tty_old_pgrp);  /* A noop */
                              spin_lock(&tty->ctrl_lock);
                              tty_pgrp = get_pid(tty->pgrp);
                              if (tty->pgrp)
                                      p->signal->tty_old_pgrp = get_pid(tty->pgrp);
                              spin_unlock(&tty->ctrl_lock);
                              spin_unlock_irq(&p->sighand->siglock);
   55                 } while_each_pid_task(tty->session, PIDTYPE_SID, p);
              }
              read_unlock(&tasklist_lock);
      
              if (tty_pgrp) {
                      if (exit_session)
                              kill_pgrp(tty_pgrp, SIGHUP, exit_session);
                      put_pid(tty_pgrp);
              }
      
              return refs;
      }
      
      /**
       *        __tty_hangup                -        actual handler for hangup events
       *        @work: tty device
       *
       *        This can be called by a "kworker" kernel thread.  That is process
       *        synchronous but doesn't hold any locks, so we need to make sure we
       *        have the appropriate locks for what we're doing.
       *
       *        The hangup event clears any pending redirections onto the hung up
       *        device. It ensures future writes will error and it does the needed
       *        line discipline hangup and signal delivery. The tty object itself
       *        remains intact.
       *
       *        Locking:
       *                BTM
       *                  redirect lock for undoing redirection
       *                  file list lock for manipulating list of ttys
       *                  tty_ldiscs_lock from called functions
       *                  termios_rwsem resetting termios data
       *                  tasklist_lock to walk task list for hangup event
       *                    ->siglock to protect ->signal/->sighand
       */
      static void __tty_hangup(struct tty_struct *tty, int exit_session)
      {
              struct file *cons_filp = NULL;
              struct file *filp, *f = NULL;
              struct tty_file_private *priv;
              int    closecount = 0, n;
   55         int refs;
      
              if (!tty)
                      return;
   55 
      
              spin_lock(&redirect_lock);
              if (redirect && file_tty(redirect) == tty) {
                      f = redirect;
   55                 redirect = NULL;
              }
              spin_unlock(&redirect_lock);
      
              tty_lock(tty);
      
              if (test_bit(TTY_HUPPED, &tty->flags)) {
                      tty_unlock(tty);
                      return;
              }
      
              /*
               * Some console devices aren't actually hung up for technical and
               * historical reasons, which can lead to indefinite interruptible
               * sleep in n_tty_read().  The following explicitly tells
   55          * n_tty_read() to abort readers.
               */
              set_bit(TTY_HUPPING, &tty->flags);
      
              /* inuse_filps is protected by the single tty lock,
                 this really needs to change if we want to flush the
                 workqueue with the lock held */
              check_tty_count(tty, "tty_hangup");
      
   31         spin_lock(&tty_files_lock);
   31         /* This breaks for file handles being sent over AF_UNIX sockets ? */
              list_for_each_entry(priv, &tty->tty_files, list) {
                      filp = priv->file;
   31                 if (filp->f_op->write == redirected_tty_write)
                              cons_filp = filp;
   31                 if (filp->f_op->write != tty_write)
                              continue;
                      closecount++;
                      __tty_fasync(-1, filp, 0);        /* can't block */
   55                 filp->f_op = &hung_up_tty_fops;
              }
   55         spin_unlock(&tty_files_lock);
      
              refs = tty_signal_session_leader(tty, exit_session);
              /* Account for the p->signal references we killed */
              while (refs--)
   55                 tty_kref_put(tty);
      
              tty_ldisc_hangup(tty);
      
              spin_lock_irq(&tty->ctrl_lock);
              clear_bit(TTY_THROTTLED, &tty->flags);
              clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
              put_pid(tty->session);
              put_pid(tty->pgrp);
              tty->session = NULL;
              tty->pgrp = NULL;
              tty->ctrl_status = 0;
              spin_unlock_irq(&tty->ctrl_lock);
      
              /*
               * If one of the devices matches a console pointer, we
               * cannot just call hangup() because that will cause
               * tty->count and state->count to go out of sync.
               * So we just call close() the right number of times.
               */
              if (cons_filp) {
                      if (tty->ops->close)
   42                         for (n = 0; n < closecount; n++)
                                      tty->ops->close(tty, cons_filp);
              } else if (tty->ops->hangup)
                      tty->ops->hangup(tty);
              /*
               * We don't want to have driver/ldisc interactions beyond
               * the ones we did here. The driver layer expects no
               * calls after ->hangup() from the ldisc side. However we
   42          * can't yet guarantee all that.
               */
              set_bit(TTY_HUPPED, &tty->flags);
              clear_bit(TTY_HUPPING, &tty->flags);
              tty_unlock(tty);
   42 
              if (f)
                      fput(f);
      }
      
      static void do_tty_hangup(struct work_struct *work)
      {
              struct tty_struct *tty =
                      container_of(work, struct tty_struct, hangup_work);
      
              __tty_hangup(tty, 0);
      }
      
      /**
       *        tty_hangup                -        trigger a hangup event
       *        @tty: tty to hangup
       *
       *        A carrier loss (virtual or otherwise) has occurred on this like
       *        schedule a hangup sequence to run after this event.
       */
      
      void tty_hangup(struct tty_struct *tty)
      {
              tty_debug_hangup(tty, "\n");
              schedule_work(&tty->hangup_work);
      }
      
      EXPORT_SYMBOL(tty_hangup);
      
      /**
       *        tty_vhangup                -        process vhangup
       *        @tty: tty to hangup
       *
       *        The user has asked via system call for the terminal to be hung up.
       *        We do this synchronously so that when the syscall returns the process
       *        is complete. That guarantee is necessary for security reasons.
       */
      
      void tty_vhangup(struct tty_struct *tty)
   55 {
   42         tty_debug_hangup(tty, "\n");
              __tty_hangup(tty, 0);
      }
      
      EXPORT_SYMBOL(tty_vhangup);
      
      
      /**
       *        tty_vhangup_self        -        process vhangup for own ctty
       *
       *        Perform a vhangup on the current controlling tty
       */
      
      void tty_vhangup_self(void)
      {
              struct tty_struct *tty;
      
              tty = get_current_tty();
              if (tty) {
                      tty_vhangup(tty);
                      tty_kref_put(tty);
              }
      }
      
      /**
       *        tty_vhangup_session                -        hangup session leader exit
       *        @tty: tty to hangup
       *
       *        The session leader is exiting and hanging up its controlling terminal.
       *        Every process in the foreground process group is signalled SIGHUP.
       *
       *        We do this synchronously so that when the syscall returns the process
       *        is complete. That guarantee is necessary for security reasons.
       */
      
      static void tty_vhangup_session(struct tty_struct *tty)
      {
              tty_debug_hangup(tty, "\n");
              __tty_hangup(tty, 1);
      }
      
      /**
       *        tty_hung_up_p                -        was tty hung up
       *        @filp: file pointer of tty
       *
       *        Return true if the tty has been subject to a vhangup or a carrier
       *        loss
       */
      
  160 int tty_hung_up_p(struct file *filp)
      {
              return (filp->f_op == &hung_up_tty_fops);
      }
      
      EXPORT_SYMBOL(tty_hung_up_p);
      
      /**
       *        disassociate_ctty        -        disconnect controlling tty
       *        @on_exit: true if exiting so need to "hang up" the session
       *
       *        This function is typically called only by the session leader, when
       *        it wants to disassociate itself from its controlling tty.
       *
       *        It performs the following functions:
       *         (1)  Sends a SIGHUP and SIGCONT to the foreground process group
       *         (2)  Clears the tty from being controlling the session
       *         (3)  Clears the controlling tty for all processes in the
       *                 session group.
       *
       *        The argument on_exit is set to 1 if called when a process is
       *        exiting; it is 0 if called by the ioctl TIOCNOTTY.
       *
       *        Locking:
       *                BTM is taken for hysterical raisins, and held when
       *                  called from no_tty().
       *                  tty_mutex is taken to protect tty
       *                  ->siglock is taken to protect ->signal/->sighand
       *                  tasklist_lock is taken to walk process list for sessions
       *                    ->siglock is taken to protect ->signal/->sighand
       */
      
      void disassociate_ctty(int on_exit)
      {
              struct tty_struct *tty;
      
              if (!current->signal->leader)
                      return;
      
              tty = get_current_tty();
              if (tty) {
                      if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) {
                              tty_vhangup_session(tty);
                      } else {
                              struct pid *tty_pgrp = tty_get_pgrp(tty);
                              if (tty_pgrp) {
                                      kill_pgrp(tty_pgrp, SIGHUP, on_exit);
                                      if (!on_exit)
                                              kill_pgrp(tty_pgrp, SIGCONT, on_exit);
                                      put_pid(tty_pgrp);
                              }
                      }
                      tty_kref_put(tty);
      
              } else if (on_exit) {
                      struct pid *old_pgrp;
                      spin_lock_irq(&current->sighand->siglock);
                      old_pgrp = current->signal->tty_old_pgrp;
                      current->signal->tty_old_pgrp = NULL;
                      spin_unlock_irq(&current->sighand->siglock);
                      if (old_pgrp) {
                              kill_pgrp(old_pgrp, SIGHUP, on_exit);
                              kill_pgrp(old_pgrp, SIGCONT, on_exit);
                              put_pid(old_pgrp);
                      }
                      return;
              }
      
              spin_lock_irq(&current->sighand->siglock);
              put_pid(current->signal->tty_old_pgrp);
              current->signal->tty_old_pgrp = NULL;
      
              tty = tty_kref_get(current->signal->tty);
              if (tty) {
                      unsigned long flags;
                      spin_lock_irqsave(&tty->ctrl_lock, flags);
                      put_pid(tty->session);
                      put_pid(tty->pgrp);
                      tty->session = NULL;
                      tty->pgrp = NULL;
                      spin_unlock_irqrestore(&tty->ctrl_lock, flags);
                      tty_kref_put(tty);
              } else
                      tty_debug_hangup(tty, "no current tty\n");
      
              spin_unlock_irq(&current->sighand->siglock);
              /* Now clear signal->tty under the lock */
              read_lock(&tasklist_lock);
              session_clear_tty(task_session(current));
              read_unlock(&tasklist_lock);
      }
      
      /**
       *
       *        no_tty        - Ensure the current process does not have a controlling tty
       */
      void no_tty(void)
      {
              /* FIXME: Review locking here. The tty_lock never covered any race
                 between a new association and proc_clear_tty but possible we need
                 to protect against this anyway */
              struct task_struct *tsk = current;
              disassociate_ctty(0);
              proc_clear_tty(tsk);
      }
      
      
      /**
       *        stop_tty        -        propagate flow control
       *        @tty: tty to stop
       *
       *        Perform flow control to the driver. May be called
       *        on an already stopped device and will not re-call the driver
       *        method.
       *
       *        This functionality is used by both the line disciplines for
       *        halting incoming flow and by the driver. It may therefore be
       *        called from any context, may be under the tty atomic_write_lock
       *        but not always.
       *
       *        Locking:
       *                flow_lock
   10  */
      
    6 void __stop_tty(struct tty_struct *tty)
      {
    9         if (tty->stopped)
                      return;
    9         tty->stopped = 1;
              if (tty->ops->stop)
                      tty->ops->stop(tty);
      }
      
      void stop_tty(struct tty_struct *tty)
      {
    5         unsigned long flags;
    4 
    5         spin_lock_irqsave(&tty->flow_lock, flags);
              __stop_tty(tty);
              spin_unlock_irqrestore(&tty->flow_lock, flags);
      }
      EXPORT_SYMBOL(stop_tty);
      
      /**
       *        start_tty        -        propagate flow control
       *        @tty: tty to start
       *
       *        Start a tty that has been stopped if at all possible. If this
       *        tty was previous stopped and is now being started, the driver
       *        start method is invoked and the line discipline woken.
       *
       *        Locking:
       *                flow_lock
       */
      
   20 void __start_tty(struct tty_struct *tty)
      {
   20         if (!tty->stopped || tty->flow_stopped)
                      return;
   19         tty->stopped = 0;
   20         if (tty->ops->start)
                      tty->ops->start(tty);
              tty_wakeup(tty);
      }
      
      void start_tty(struct tty_struct *tty)
      {
   49         unsigned long flags;
   15 
   49         spin_lock_irqsave(&tty->flow_lock, flags);
              __start_tty(tty);
              spin_unlock_irqrestore(&tty->flow_lock, flags);
      }
      EXPORT_SYMBOL(start_tty);
      
   34 static void tty_update_time(struct timespec *time)
      {
              unsigned long sec = get_seconds();
      
              /*
               * We only care if the two values differ in anything other than the
               * lower three bits (i.e every 8 seconds).  If so, then we can update
               * the time of the tty device, otherwise it could be construded as a
               * security leak to let userspace know the exact timing of the tty.
   11          */
              if ((sec ^ time->tv_sec) & ~7)
                      time->tv_sec = sec;
      }
      
      /**
       *        tty_read        -        read method for tty device files
       *        @file: pointer to tty file
       *        @buf: user buffer
       *        @count: size of user buffer
       *        @ppos: unused
       *
       *        Perform the read system call function on this terminal device. Checks
       *        for hung up devices before calling the line discipline method.
       *
       *        Locking:
       *                Locks the line discipline internally while needed. Multiple
       *        read calls may be outstanding in parallel.
       */
      
      static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
                              loff_t *ppos)
   58 {
              int i;
              struct inode *inode = file_inode(file);
              struct tty_struct *tty = file_tty(file);
              struct tty_ldisc *ld;
      
   58         if (tty_paranoia_check(tty, inode, "tty_read"))
                      return -EIO;
              if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
                      return -EIO;
      
   57         /* We want to wait for the line discipline to sort out in this
                 situation */
   57         ld = tty_ldisc_ref_wait(tty);
              if (ld->ops->read)
                      i = ld->ops->read(tty, file, buf, count);
              else
                      i = -EIO;
   27         tty_ldisc_deref(ld);
   34 
              if (i > 0)
   50                 tty_update_time(&inode->i_atime);
      
              return i;
      }
      
   45 static void tty_write_unlock(struct tty_struct *tty)
      {
              mutex_unlock(&tty->atomic_write_lock);
              wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
      }
      
   78 static int tty_write_lock(struct tty_struct *tty, int ndelay)
    9 {
              if (!mutex_trylock(&tty->atomic_write_lock)) {
    8                 if (ndelay)
                              return -EAGAIN;
                      if (mutex_lock_interruptible(&tty->atomic_write_lock))
   78                         return -ERESTARTSYS;
              }
              return 0;
      }
      
      /*
       * Split writes up in sane blocksizes to avoid
       * denial-of-service type attacks
       */
      static inline ssize_t do_tty_write(
              ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
              struct tty_struct *tty,
              struct file *file,
              const char __user *buf,
              size_t count)
      {
              ssize_t ret, written = 0;
   67         unsigned int chunk;
      
              ret = tty_write_lock(tty, file->f_flags & O_NDELAY);
              if (ret < 0)
                      return ret;
      
              /*
               * We chunk up writes into a temporary buffer. This
               * simplifies low-level drivers immensely, since they
               * don't have locking issues and user mode accesses.
               *
               * But if TTY_NO_WRITE_SPLIT is set, we should use a
               * big chunk-size..
               *
               * The default chunk-size is 2kB, because the NTTY
               * layer has problems with bigger chunks. It will
               * claim to be able to handle more characters than
               * it actually does.
               *
               * FIXME: This can probably go away now except that 64K chunks
               * are too likely to fail unless switched to vmalloc...
   67          */
              chunk = 2048;
   67         if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags))
   24                 chunk = 65536;
              if (count < chunk)
                      chunk = count;
   67 
              /* write_buf/write_cnt is protected by the atomic_write_lock mutex */
              if (tty->write_cnt < chunk) {
   59                 unsigned char *buf_chunk;
      
                      if (chunk < 1024)
                              chunk = 1024;
      
                      buf_chunk = kmalloc(chunk, GFP_KERNEL);
                      if (!buf_chunk) {
                              ret = -ENOMEM;
   59                         goto out;
                      }
                      kfree(tty->write_buf);
                      tty->write_cnt = chunk;
                      tty->write_buf = buf_chunk;
              }
      
              /* Do the write .. */
   71         for (;;) {
                      size_t size = count;
                      if (size > chunk)
                              size = chunk;
                      ret = -EFAULT;
   71                 if (copy_from_user(tty->write_buf, buf, size))
                              break;
                      ret = write(tty, file, tty->write_buf, size);
   54                 if (ret <= 0)
                              break;
                      written += ret;
                      buf += ret;
                      count -= ret;
                      if (!count)
   45                         break;
                      ret = -ERESTARTSYS;
   45                 if (signal_pending(current))
                              break;
   36                 cond_resched();
   28         }
              if (written) {
                      tty_update_time(&file_inode(file)->i_mtime);
                      ret = written;
   36         }
      out:
              tty_write_unlock(tty);
              return ret;
      }
      
      /**
       * tty_write_message - write a message to a certain tty, not just the console.
       * @tty: the destination tty_struct
       * @msg: the message to write
       *
       * This is used for messages that need to be redirected to a specific tty.
       * We don't put it into the syslog queue right now maybe in the future if
       * really needed.
       *
       * We must still hold the BTM and test the CLOSING flag for the moment.
       */
      
      void tty_write_message(struct tty_struct *tty, char *msg)
      {
              if (tty) {
                      mutex_lock(&tty->atomic_write_lock);
                      tty_lock(tty);
                      if (tty->ops->write && tty->count > 0)
                              tty->ops->write(tty, msg, strlen(msg));
                      tty_unlock(tty);
                      tty_write_unlock(tty);
              }
              return;
      }
      
      
      /**
       *        tty_write                -        write method for tty device file
       *        @file: tty file pointer
       *        @buf: user data to write
       *        @count: bytes to write
       *        @ppos: unused
       *
       *        Write data to a tty device via the line discipline.
       *
       *        Locking:
       *                Locks the line discipline as required
       *                Writes to the tty driver are serialized by the atomic_write_lock
       *        and are then processed in chunks to the device. The line discipline
       *        write method will not be invoked in parallel for each device.
       */
      
      static ssize_t tty_write(struct file *file, const char __user *buf,
   67                                                 size_t count, loff_t *ppos)
      {
              struct tty_struct *tty = file_tty(file);
               struct tty_ldisc *ld;
              ssize_t ret;
      
   67         if (tty_paranoia_check(tty, file_inode(file), "tty_write"))
   67                 return -EIO;
              if (!tty || !tty->ops->write ||
                      (test_bit(TTY_IO_ERROR, &tty->flags)))
   67                         return -EIO;
              /* Short term debug to catch buggy drivers */
              if (tty->ops->write_room == NULL)
   67                 printk(KERN_ERR "tty driver %s lacks a write_room method.\n",
                              tty->driver->name);
              ld = tty_ldisc_ref_wait(tty);
              if (!ld->ops->write)
   71                 ret = -EIO;
   37         else
   37                 ret = do_tty_write(ld->ops->write, tty, file, buf, count);
              tty_ldisc_deref(ld);
              return ret;
      }
      
      ssize_t redirected_tty_write(struct file *file, const char __user *buf,
                                                      size_t count, loff_t *ppos)
      {
              struct file *p = NULL;
      
              spin_lock(&redirect_lock);
              if (redirect)
                      p = get_file(redirect);
              spin_unlock(&redirect_lock);
      
              if (p) {
                      ssize_t res;
                      res = vfs_write(p, buf, count, &p->f_pos);
                      fput(p);
                      return res;
              }
              return tty_write(file, buf, count, ppos);
      }
      
      /**
       *        tty_send_xchar        -        send priority character
       *
       *        Send a high priority character to the tty even if stopped
       *
       *        Locking: none for xchar method, write ordering for write method.
       */
      
   12 int tty_send_xchar(struct tty_struct *tty, char ch)
      {
   12         int        was_stopped = tty->stopped;
      
              if (tty->ops->send_xchar) {
                      down_read(&tty->termios_rwsem);
   11                 tty->ops->send_xchar(tty, ch);
                      up_read(&tty->termios_rwsem);
                      return 0;
              }
      
              if (tty_write_lock(tty, 0) < 0)
   11                 return -ERESTARTSYS;
      
    5         down_read(&tty->termios_rwsem);
    7         if (was_stopped)
                      start_tty(tty);
              tty->ops->write(tty, &ch, 1);
   10         if (was_stopped)
                      stop_tty(tty);
              up_read(&tty->termios_rwsem);
              tty_write_unlock(tty);
              return 0;
      }
      
      static char ptychar[] = "pqrstuvwxyzabcde";
      
      /**
       *        pty_line_name        -        generate name for a pty
       *        @driver: the tty driver in use
       *        @index: the minor number
       *        @p: output buffer of at least 6 bytes
       *
       *        Generate a name from a driver reference and write it to the output
       *        buffer.
       *
       *        Locking: None
       */
      static void pty_line_name(struct tty_driver *driver, int index, char *p)
      {
              int i = index + driver->name_base;
              /* ->name is initialized to "ttyp", but "tty" is expected */
              sprintf(p, "%s%c%x",
                      driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name,
                      ptychar[i >> 4 & 0xf], i & 0xf);
      }
      
      /**
       *        tty_line_name        -        generate name for a tty
       *        @driver: the tty driver in use
       *        @index: the minor number
       *        @p: output buffer of at least 7 bytes
       *
       *        Generate a name from a driver reference and write it to the output
       *        buffer.
       *
  155  *        Locking: None
       */
  155 static ssize_t tty_line_name(struct tty_driver *driver, int index, char *p)
  155 {
              if (driver->flags & TTY_DRIVER_UNNUMBERED_NODE)
                      return sprintf(p, "%s", driver->name);
  155         else
                      return sprintf(p, "%s%d", driver->name,
                                     index + driver->name_base);
      }
      
      /**
       *        tty_driver_lookup_tty() - find an existing tty, if any
       *        @driver: the driver for the tty
       *        @idx:         the minor number
       *
       *        Return the tty, if found. If not found, return NULL or ERR_PTR() if the
       *        driver lookup() method returns an error.
       *
       *        Locking: tty_mutex must be held. If the tty is found, bump the tty kref.
       */
      static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver,
                      struct inode *inode, int idx)
      {
              struct tty_struct *tty;
   51 
              if (driver->ops->lookup)
                      tty = driver->ops->lookup(driver, inode, idx);
              else
   51                 tty = driver->ttys[idx];
   50 
              if (!IS_ERR(tty))
                      tty_kref_get(tty);
              return tty;
      }
      
      /**
       *        tty_init_termios        -  helper for termios setup
       *        @tty: the tty to set up
       *
       *        Initialise the termios structures for this tty. Thus runs under
       *        the tty_mutex currently so we can be relaxed about ordering.
       */
      
      int tty_init_termios(struct tty_struct *tty)
      {
              struct ktermios *tp;
              int idx = tty->index;
      
              if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
                      tty->termios = tty->driver->init_termios;
              else {
                      /* Check for lazy saved data */
                      tp = tty->driver->termios[idx];
                      if (tp != NULL)
                              tty->termios = *tp;
                      else
                              tty->termios = tty->driver->init_termios;
              }
              /* Compatibility until drivers always set this */
              tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios);
              tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios);
              return 0;
      }
      EXPORT_SYMBOL_GPL(tty_init_termios);
      
      int tty_standard_install(struct tty_driver *driver, struct tty_struct *tty)
      {
              int ret = tty_init_termios(tty);
              if (ret)
                      return ret;
      
              tty_driver_kref_get(driver);
              tty->count++;
              driver->ttys[tty->index] = tty;
              return 0;
      }
      EXPORT_SYMBOL_GPL(tty_standard_install);
      
      /**
       *        tty_driver_install_tty() - install a tty entry in the driver
       *        @driver: the driver for the tty
       *        @tty: the tty
       *
       *        Install a tty object into the driver tables. The tty->index field
       *        will be set by the time this is called. This method is responsible
       *        for ensuring any need additional structures are allocated and
       *        configured.
       *
       *        Locking: tty_mutex for now
       */
      static int tty_driver_install_tty(struct tty_driver *driver,
  155                                                 struct tty_struct *tty)
      {
              return driver->ops->install ? driver->ops->install(driver, tty) :
                      tty_standard_install(driver, tty);
      }
      
      /**
       *        tty_driver_remove_tty() - remove a tty from the driver tables
       *        @driver: the driver for the tty
       *        @idx:         the minor number
       *
       *        Remvoe a tty object from the driver tables. The tty->index field
       *        will be set by the time this is called.
       *
       *        Locking: tty_mutex for now
       */
   20 void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
   20 {
              if (driver->ops->remove)
                      driver->ops->remove(driver, tty);
   20         else
                      driver->ttys[tty->index] = NULL;
      }
      
      /*
       *         tty_reopen()        - fast re-open of an open tty
       *         @tty        - the tty to open
       *
       *        Return 0 on success, -errno on error.
       *        Re-opens on master ptys are not allowed and return -EIO.
       *
       *        Locking: Caller must hold tty_lock
       */
   50 static int tty_reopen(struct tty_struct *tty)
      {
              struct tty_driver *driver = tty->driver;
      
              if (driver->type == TTY_DRIVER_TYPE_PTY &&
                  driver->subtype == PTY_TYPE_MASTER)
   50                 return -EIO;
      
              if (!tty->count)
   50                 return -EAGAIN;
      
              if (test_bit(TTY_EXCLUSIVE, &tty->flags) && !capable(CAP_SYS_ADMIN))
   48                 return -EBUSY;
      
   50         tty->count++;
      
              WARN_ON(!tty->ldisc);
      
              return 0;
      }
      
      /**
       *        tty_init_dev                -        initialise a tty device
       *        @driver: tty driver we are opening a device on
       *        @idx: device index
       *        @ret_tty: returned tty structure
       *
       *        Prepare a tty device. This may not be a "new" clean device but
       *        could also be an active device. The pty drivers require special
       *        handling because of this.
       *
       *        Locking:
       *                The function is called under the tty_mutex, which
       *        protects us from the tty struct or driver itself going away.
       *
       *        On exit the tty device has the line discipline attached and
       *        a reference count of 1. If a pair was created for pty/tty use
       *        and the other was a pty master then it too has a reference count of 1.
       *
       * WSH 06/09/97: Rewritten to remove races and properly clean up after a
       * failed open.  The new code protects the open with a mutex, so it's
       * really quite straightforward.  The mutex locking can probably be
       * relaxed for the (most common) case of reopening a tty.
  155  */
      
      struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
      {
              struct tty_struct *tty;
              int retval;
      
              /*
               * First time open is complex, especially for PTY devices.
               * This code guarantees that either everything succeeds and the
               * TTY is ready for operation, or else the table slots are vacated
               * and the allocated memory released.  (Except that the termios
               * and locked termios may be retained.)
  156          */
      
              if (!try_module_get(driver->owner))
  155                 return ERR_PTR(-ENODEV);
      
              tty = alloc_tty_struct(driver, idx);
              if (!tty) {
                      retval = -ENOMEM;
                      goto err_module_put;
  155         }
  155 
  153         tty_lock(tty);
              retval = tty_driver_install_tty(driver, tty);
              if (retval < 0)
  153                 goto err_deinit_tty;
      
              if (!tty->port)
                      tty->port = driver->ports[idx];
      
              WARN_RATELIMIT(!tty->port,
                              "%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n",
  153                         __func__, tty->driver->name);
      
              tty->port->itty = tty;
      
              /*
               * Structures all installed ... call the ldisc open routines.
               * If we fail here just call release_tty to clean up.  No need
               * to decrement the use counts, as release_tty doesn't care.
               */
              retval = tty_ldisc_setup(tty, tty->link);
              if (retval)
                      goto err_release_tty;
              /* Return the tty locked so that it cannot vanish under the caller */
              return tty;
      
      err_deinit_tty:
              tty_unlock(tty);
              deinitialize_tty_struct(tty);
              free_tty_struct(tty);
      err_module_put:
              module_put(driver->owner);
              return ERR_PTR(retval);
      
              /* call the tty release_tty routine to clean out this slot */
      err_release_tty:
              tty_unlock(tty);
              printk_ratelimited(KERN_INFO "tty_init_dev: ldisc open failed, "
                                       "clearing slot %d\n", idx);
              release_tty(tty, idx);
              return ERR_PTR(retval);
      }
      
      void tty_free_termios(struct tty_struct *tty)
      {
              struct ktermios *tp;
              int idx = tty->index;
   20 
              /* If the port is going to reset then it has no termios to save */
              if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
                      return;
      
              /* Stash the termios data */
              tp = tty->driver->termios[idx];
              if (tp == NULL) {
                      tp = kmalloc(sizeof(struct ktermios), GFP_KERNEL);
                      if (tp == NULL) {
                              pr_warn("tty: no memory to save termios state.\n");
                              return;
                      }
   20                 tty->driver->termios[idx] = tp;
              }
              *tp = tty->termios;
      }
      EXPORT_SYMBOL(tty_free_termios);
      
      /**
       *        tty_flush_works                -        flush all works of a tty/pty pair
       *        @tty: tty device to flush works for (or either end of a pty pair)
       *
       *        Sync flush all works belonging to @tty (and the 'other' tty).
       */
      static void tty_flush_works(struct tty_struct *tty)
      {
              flush_work(&tty->SAK_work);
   21         flush_work(&tty->hangup_work);
              if (tty->link) {
                      flush_work(&tty->link->SAK_work);
                      flush_work(&tty->link->hangup_work);
              }
      }
      
      /**
       *        release_one_tty                -        release tty structure memory
       *        @kref: kref of tty we are obliterating
       *
       *        Releases memory associated with a tty structure, and clears out the
       *        driver table slots. This function is called when a device is no longer
       *        in use. It also gets called when setup of a device fails.
       *
       *        Locking:
       *                takes the file list lock internally when working on the list
       *        of ttys that the driver keeps.
       *
       *        This method gets called from a work queue so that the driver private
       *        cleanup ops can sleep (needed for USB at least)
       */
      static void release_one_tty(struct work_struct *work)
      {
              struct tty_struct *tty =
                      container_of(work, struct tty_struct, hangup_work);
              struct tty_driver *driver = tty->driver;
              struct module *owner = driver->owner;
      
              if (tty->ops->cleanup)
                      tty->ops->cleanup(tty);
      
              tty->magic = 0;
              tty_driver_kref_put(driver);
              module_put(owner);
      
              spin_lock(&tty_files_lock);
              list_del_init(&tty->tty_files);
              spin_unlock(&tty_files_lock);
      
              put_pid(tty->pgrp);
              put_pid(tty->session);
              free_tty_struct(tty);
      }
      
      static void queue_release_one_tty(struct kref *kref)
      {
              struct tty_struct *tty = container_of(kref, struct tty_struct, kref);
      
   20         /* The hangup queue is now free so we can reuse it rather than
                 waste a chunk of memory for each port */
              INIT_WORK(&tty->hangup_work, release_one_tty);
              schedule_work(&tty->hangup_work);
      }
      
      /**
       *        tty_kref_put                -        release a tty kref
       *        @tty: tty device
       *
       *        Release a reference to a tty device and if need be let the kref
       *        layer destruct the object for us
       */
      
  254 void tty_kref_put(struct tty_struct *tty)
  226 {
  254         if (tty)
                      kref_put(&tty->kref, queue_release_one_tty);
      }
      EXPORT_SYMBOL(tty_kref_put);
      
      /**
       *        release_tty                -        release tty structure memory
       *
       *        Release both @tty and a possible linked partner (think pty pair),
       *        and decrement the refcount of the backing module.
       *
       *        Locking:
       *                tty_mutex
       *                takes the file list lock internally when working on the list
       *        of ttys that the driver keeps.
       *
       */
      static void release_tty(struct tty_struct *tty, int idx)
   20 {
   20         /* This should always be true but check for the moment */
   20         WARN_ON(tty->index != idx);
   20         WARN_ON(!mutex_is_locked(&tty_mutex));
   20         if (tty->ops->shutdown)
                      tty->ops->shutdown(tty);
              tty_free_termios(tty);
              tty_driver_remove_tty(tty->driver, tty);
   20         tty->port->itty = NULL;
   20         if (tty->link)
                      tty->link->port->itty = NULL;
   20         tty_buffer_cancel_work(tty->port);
              if (tty->link)
   20                 tty_buffer_cancel_work(tty->link->port);
      
              tty_kref_put(tty->link);
              tty_kref_put(tty);
      }
      
      /**
       *        tty_release_checks - check a tty before real release
       *        @tty: tty to check
       *        @o_tty: link of @tty (if any)
       *        @idx: index of the tty
       *
       *        Performs some paranoid checking before true release of the @tty.
       *        This is a no-op unless TTY_PARANOIA_CHECK is defined.
       */
      static int tty_release_checks(struct tty_struct *tty, int idx)
   66 {
      #ifdef TTY_PARANOIA_CHECK
              if (idx < 0 || idx >= tty->driver->num) {
                      tty_debug(tty, "bad idx %d\n", idx);
                      return -1;
              }
   66 
              /* not much to check for devpts */
              if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM)
                      return 0;
      
              if (tty != tty->driver->ttys[idx]) {
                      tty_debug(tty, "bad driver table[%d] = %p\n",
                                idx, tty->driver->ttys[idx]);
                      return -1;
              }
              if (tty->driver->other) {
                      struct tty_struct *o_tty = tty->link;
      
                      if (o_tty != tty->driver->other->ttys[idx]) {
                              tty_debug(tty, "bad other table[%d] = %p\n",
                                        idx, tty->driver->other->ttys[idx]);
                              return -1;
                      }
                      if (o_tty->link != tty) {
                              tty_debug(tty, "bad link = %p\n", o_tty->link);
                              return -1;
                      }
              }
      #endif
              return 0;
      }
      
      /**
       *        tty_release                -        vfs callback for close
       *        @inode: inode of tty
       *        @filp: file pointer for handle to tty
       *
       *        Called the last time each file handle is closed that references
       *        this tty. There may however be several such references.
       *
       *        Locking:
       *                Takes bkl. See tty_release_dev
       *
       * Even releasing the tty structures is a tricky business.. We have
       * to be very careful that the structures are all released at the
       * same time, as interrupts might otherwise get the wrong pointers.
       *
       * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
       * lead to double frees or releasing memory still in use.
       */
      
   66 int tty_release(struct inode *inode, struct file *filp)
      {
              struct tty_struct *tty = file_tty(filp);
              struct tty_struct *o_tty = NULL;
              int        do_sleep, final;
              int        idx;
              long        timeout = 0;
   48         int        once = 1;
      
              if (tty_paranoia_check(tty, inode, __func__))
   66                 return 0;
      
              tty_lock(tty);
              check_tty_count(tty, __func__);
      
              __tty_fasync(-1, filp, 0);
      
              idx = tty->index;
   56         if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
                  tty->driver->subtype == PTY_TYPE_MASTER)
   66                 o_tty = tty->link;
      
              if (tty_release_checks(tty, idx)) {
                      tty_unlock(tty);
                      return 0;
              }
      
   66         tty_debug_hangup(tty, "(tty count=%d)...\n", tty->count);
   66 
              if (tty->ops->close)
                      tty->ops->close(tty, filp);
   50 
              /* If tty is pty master, lock the slave pty (stable lock order) */
              tty_lock_slave(o_tty);
      
              /*
               * Sanity check: if tty->count is going to zero, there shouldn't be
               * any waiters on tty->read_wait or tty->write_wait.  We test the
               * wait queues and kick everyone out _before_ actually starting to
               * close.  This ensures that we won't block while releasing the tty
               * structure.
               *
               * The test for the o_tty closing is necessary, since the master and
               * slave sides may close in any order.  If the slave side closes out
               * first, its count will be one, since the master side holds an open.
               * Thus this test wouldn't be triggered at the time the slave closed,
               * so we do it now.
               */
              while (1) {
   50                 do_sleep = 0;
   43 
                      if (tty->count <= 1) {
                              if (waitqueue_active(&tty->read_wait)) {
                                      wake_up_poll(&tty->read_wait, POLLIN);
   43                                 do_sleep++;
                              }
                              if (waitqueue_active(&tty->write_wait)) {
                                      wake_up_poll(&tty->write_wait, POLLOUT);
                                      do_sleep++;
   50                         }
   21                 }
                      if (o_tty && o_tty->count <= 1) {
                              if (waitqueue_active(&o_tty->read_wait)) {
                                      wake_up_poll(&o_tty->read_wait, POLLIN);
   21                                 do_sleep++;
                              }
                              if (waitqueue_active(&o_tty->write_wait)) {
                                      wake_up_poll(&o_tty->write_wait, POLLOUT);
                                      do_sleep++;
   42                         }
                      }
                      if (!do_sleep)
                              break;
      
                      if (once) {
                              once = 0;
                              printk(KERN_WARNING "%s: %s: read/write wait queue active!\n",
                                     __func__, tty_name(tty));
                      }
                      schedule_timeout_killable(timeout);
                      if (timeout < 120 * HZ)
                              timeout = 2 * timeout + 1;
                      else
                              timeout = MAX_SCHEDULE_TIMEOUT;
   42         }
   42 
              if (o_tty) {
                      if (--o_tty->count < 0) {
                              printk(KERN_WARNING "%s: bad pty slave count (%d) for %s\n",
                                      __func__, o_tty->count, tty_name(o_tty));
                              o_tty->count = 0;
   50                 }
              }
              if (--tty->count < 0) {
                      printk(KERN_WARNING "%s: bad tty->count (%d) for %s\n",
                                      __func__, tty->count, tty_name(tty));
                      tty->count = 0;
              }
      
              /*
               * We've decremented tty->count, so we need to remove this file
               * descriptor off the tty->tty_files list; this serves two
               * purposes:
               *  - check_tty_count sees the correct number of file descriptors
               *    associated with this tty.
               *  - do_tty_hangup no longer sees this file descriptor as
   50          *    something that needs to be handled for hangups.
               */
              tty_del_file(filp);
      
              /*
               * Perform some housekeeping before deciding whether to return.
               *
               * If _either_ side is closing, make sure there aren't any
               * processes that still think tty or o_tty is their controlling
               * tty.
   42          */
              if (!tty->count) {
   42                 read_lock(&tasklist_lock);
   41                 session_clear_tty(tty->session);
   42                 if (o_tty)
                              session_clear_tty(o_tty->session);
                      read_unlock(&tasklist_lock);
              }
   42 
              /* check whether both sides are closing ... */
   50         final = !tty->count && !(o_tty && o_tty->count);
      
              tty_unlock_slave(o_tty);
              tty_unlock(tty);
      
              /* At this point, the tty->count == 0 should ensure a dead tty
                 cannot be re-opened by a racing opener */
      
              if (!final)
                      return 0;
      
              tty_debug_hangup(tty, "final close\n");
              /*
   22          * Ask the line discipline code to release its structures
               */
              tty_ldisc_release(tty);
   21 
              /* Wait for pending work before tty destruction commmences */
              tty_flush_works(tty);
      
              tty_debug_hangup(tty, "freeing structure...\n");
              /*
               * The release_tty function takes care of the details of clearing
               * the slots and preserving the termios structure. The tty_unlock_pair
               * should be safe as we keep a kref while the tty is locked (so the
   20          * unlock never unlocks a freed tty).
               */
              mutex_lock(&tty_mutex);
              release_tty(tty, idx);
              mutex_unlock(&tty_mutex);
      
              return 0;
      }
      
      /**
       *        tty_open_current_tty - get locked tty of current task
       *        @device: device number
       *        @filp: file pointer to tty
       *        @return: locked tty of the current task iff @device is /dev/tty
       *
       *        Performs a re-open of the current task's controlling tty.
       *
       *        We cannot return driver and index like for the other nodes because
       *        devpts will not work then. It expects inodes to be from devpts FS.
       */
      static struct tty_struct *tty_open_current_tty(dev_t device, struct file *filp)
      {
              struct tty_struct *tty;
              int retval;
      
              if (device != MKDEV(TTYAUX_MAJOR, 0))
    9                 return NULL;
      
              tty = get_current_tty();
              if (!tty)
                      return ERR_PTR(-ENXIO);
      
              filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
              /* noctty = 1; */
              tty_lock(tty);
              tty_kref_put(tty);        /* safe to drop the kref now */
      
              retval = tty_reopen(tty);
              if (retval < 0) {
                      tty_unlock(tty);
                      tty = ERR_PTR(retval);
              }
              return tty;
      }
      
      /**
       *        tty_lookup_driver - lookup a tty driver for a given device file
       *        @device: device number
       *        @filp: file pointer to tty
       *        @noctty: set if the device should not become a controlling tty
       *        @index: index for the device in the @return driver
       *        @return: driver for this inode (with increased refcount)
       *
       *         If @return is not erroneous, the caller is responsible to decrement the
       *         refcount by tty_driver_kref_put.
       *
       *        Locking: tty_mutex protects get_tty_driver
       */
      static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
                      int *noctty, int *index)
      {
              struct tty_driver *driver;
      
              switch (device) {
      #ifdef CONFIG_VT
              case MKDEV(TTY_MAJOR, 0): {
                      extern struct tty_driver *console_driver;
                      driver = tty_driver_kref_get(console_driver);
                      *index = fg_console;
                      *noctty = 1;
                      break;
              }
      #endif
              case MKDEV(TTYAUX_MAJOR, 1): {
                      struct tty_driver *console_driver = console_device(index);
                      if (console_driver) {
                              driver = tty_driver_kref_get(console_driver);
                              if (driver) {
                                      /* Don't let /dev/console block */
                                      filp->f_flags |= O_NONBLOCK;
                                      *noctty = 1;
                                      break;
                              }
                      }
                      return ERR_PTR(-ENODEV);
   51         }
   51         default:
                      driver = get_tty_driver(device, index);
                      if (!driver)
                              return ERR_PTR(-ENODEV);
                      break;
              }
              return driver;
      }
      
      /**
       *        tty_open                -        open a tty device
       *        @inode: inode of device file
       *        @filp: file pointer to tty
       *
       *        tty_open and tty_release keep up the tty count that contains the
       *        number of opens done on a tty. We cannot use the inode-count, as
       *        different inodes might point to the same tty.
       *
       *        Open-counting is needed for pty masters, as well as for keeping
       *        track of serial lines: DTR is dropped when the last close happens.
       *        (This is not done solely through tty->count, now.  - Ted 1/27/92)
       *
       *        The termios state of a pty is reset on first open so that
       *        settings don't persist across reuse.
       *
       *        Locking: tty_mutex protects tty, tty_lookup_driver and tty_init_dev.
       *                 tty->count should protect the rest.
       *                 ->siglock protects ->signal/->sighand
       *
       *        Note: the tty_unlock/lock cases without a ref are only safe due to
       *        tty_mutex
       */
      
      static int tty_open(struct inode *inode, struct file *filp)
      {
              struct tty_struct *tty;
              int noctty, retval;
   60         struct tty_driver *driver = NULL;
              int index;
              dev_t device = inode->i_rdev;
              unsigned saved_flags = filp->f_flags;
      
              nonseekable_open(inode, filp);
   60 
      retry_open:
              retval = tty_alloc_file(filp);
              if (retval)
                      return -ENOMEM;
      
              noctty = filp->f_flags & O_NOCTTY;
              index  = -1;
    9         retval = 0;
      
   51         tty = tty_open_current_tty(device, filp);
   51         if (!tty) {
   51                 mutex_lock(&tty_mutex);
                      driver = tty_lookup_driver(device, filp, &noctty, &index);
                      if (IS_ERR(driver)) {
                              retval = PTR_ERR(driver);
                              goto err_unlock;
                      }
   51 
                      /* check whether we're reopening an existing tty */
    1                 tty = tty_driver_lookup_tty(driver, inode, index);
                      if (IS_ERR(tty)) {
                              retval = PTR_ERR(tty);
                              goto err_unlock;
                      }
   50 
                      if (tty) {
                              mutex_unlock(&tty_mutex);
                              retval = tty_lock_interruptible(tty);
    2                         tty_kref_put(tty);  /* drop kref from tty_driver_lookup_tty() */
                              if (retval) {
                                      if (retval == -EINTR)
                                              retval = -ERESTARTSYS;
   50                                 goto err_unref;
                              }
    2                         retval = tty_reopen(tty);
                              if (retval < 0) {
                                      tty_unlock(tty);
                                      tty = ERR_PTR(retval);
                              }
                      } else { /* Returns with the tty_lock held for now */
                              tty = tty_init_dev(driver, index);
                              mutex_unlock(&tty_mutex);
   50                 }
      
                      tty_driver_kref_put(driver);
   50         }
    2 
              if (IS_ERR(tty)) {
                      retval = PTR_ERR(tty);
                      if (retval != -EAGAIN || signal_pending(current))
                              goto err_file;
                      tty_free_file(filp);
                      schedule();
                      goto retry_open;
   48         }
      
              tty_add_file(tty, filp);
      
              check_tty_count(tty, __func__);
              if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
                  tty->driver->subtype == PTY_TYPE_MASTER)
                      noctty = 1;
      
   48         tty_debug_hangup(tty, "(tty count=%d)\n", tty->count);
   48 
              if (tty->ops->open)
                      retval = tty->ops->open(tty, filp);
              else
                      retval = -ENODEV;
              filp->f_flags = saved_flags;
      
              if (retval) {
    6                 tty_debug_hangup(tty, "error %d, releasing...\n", retval);
      
   60                 tty_unlock(tty); /* need to call tty_release without BTM */
                      tty_release(inode, filp);
                      if (retval != -ERESTARTSYS)
                              return retval;
      
                      if (signal_pending(current))
                              return retval;
      
                      schedule();
                      /*
                       * Need to reset f_op in case a hangup happened.
                       */
                      if (tty_hung_up_p(filp))
                              filp->f_op = &tty_fops;
   42                 goto retry_open;
              }
              clear_bit(TTY_HUPPED, &tty->flags);
      
      
              read_lock(&tasklist_lock);
   41         spin_lock_irq(&current->sighand->siglock);
              if (!noctty &&
                  current->signal->leader &&
                  !current->signal->tty &&
                  tty->session == NULL) {
                      /*
                       * Don't let a process that only has write access to the tty
                       * obtain the privileges associated with having a tty as
                       * controlling terminal (being able to reopen it with full
                       * access through /dev/tty, being able to perform pushback).
                       * Many distributions set the group of all ttys to "tty" and
                       * grant write-only access to all terminals for setgid tty
                       * binaries, which should not imply full privileges on all ttys.
                       *
                       * This could theoretically break old code that performs open()
                       * on a write-only file descriptor. In that case, it might be
                       * necessary to also permit this if
                       * inode_permission(inode, MAY_READ) == 0.
                       */
                      if (filp->f_mode & FMODE_READ)
   42                         __proc_set_tty(tty);
              }
              spin_unlock_irq(&current->sighand->siglock);
              read_unlock(&tasklist_lock);
              tty_unlock(tty);
              return 0;
      err_unlock:
              mutex_unlock(&tty_mutex);
      err_unref:
    3         /* after locks to avoid deadlock */
              if (!IS_ERR_OR_NULL(driver))
   14                 tty_driver_kref_put(driver);
      err_file:
    9         tty_free_file(filp);
              return retval;
      }
      
      
      
      /**
       *        tty_poll        -        check tty status
       *        @filp: file being polled
       *        @wait: poll wait structures to update
       *
       *        Call the line discipline polling method to obtain the poll
       *        status of the device.
       *
       *        Locking: locks called line discipline but ldisc poll method
       *        may be re-entered freely by other callers.
       */
      
   66 static unsigned int tty_poll(struct file *filp, poll_table *wait)
      {
              struct tty_struct *tty = file_tty(filp);
              struct tty_ldisc *ld;
   62         int ret = 0;
      
              if (tty_paranoia_check(tty, file_inode(filp), "tty_poll"))
   66                 return 0;
      
   66         ld = tty_ldisc_ref_wait(tty);
   62         if (ld->ops->poll)
                      ret = ld->ops->poll(tty, filp, wait);
              tty_ldisc_deref(ld);
              return ret;
      }
      
   68 static int __tty_fasync(int fd, struct file *filp, int on)
      {
              struct tty_struct *tty = file_tty(filp);
              struct tty_ldisc *ldisc;
              unsigned long flags;
              int retval = 0;
      
              if (tty_paranoia_check(tty, file_inode(filp), "tty_fasync"))
   68                 goto out;
      
              retval = fasync_helper(fd, filp, on, &tty->fasync);
              if (retval <= 0)
    7                 goto out;
      
    7         ldisc = tty_ldisc_ref(tty);
    7         if (ldisc) {
    7                 if (ldisc->ops->fasync)
                              ldisc->ops->fasync(tty, on);
                      tty_ldisc_deref(ldisc);
    7         }
      
              if (on) {
                      enum pid_type type;
    3                 struct pid *pid;
      
                      spin_lock_irqsave(&tty->ctrl_lock, flags);
                      if (tty->pgrp) {
                              pid = tty->pgrp;
    3                         type = PIDTYPE_PGID;
                      } else {
                              pid = task_pid(current);
    3                         type = PIDTYPE_PID;
    3                 }
                      get_pid(pid);
                      spin_unlock_irqrestore(&tty->ctrl_lock, flags);
                      __f_setown(filp, pid, type, 0);
                      put_pid(pid);
                      retval = 0;
   68         }
      out:
              return retval;
      }
      
    5 static int tty_fasync(int fd, struct file *filp, int on)
      {
              struct tty_struct *tty = file_tty(filp);
              int retval;
      
              tty_lock(tty);
              retval = __tty_fasync(fd, filp, on);
              tty_unlock(tty);
      
              return retval;
      }
      
      /**
       *        tiocsti                        -        fake input character
       *        @tty: tty to fake input into
       *        @p: pointer to character
       *
       *        Fake input to a tty device. Does the necessary locking and
       *        input management.
       *
       *        FIXME: does not honour flow control ??
       *
       *        Locking:
       *                Called functions take tty_ldiscs_lock
       *                current->signal->tty check is safe without locks
       *
       *        FIXME: may race normal receive processing
       */
      
    1 static int tiocsti(struct tty_struct *tty, char __user *p)
      {
              char ch, mbz = 0;
    1         struct tty_ldisc *ld;
      
              if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
                      return -EPERM;
              if (get_user(ch, p))
                      return -EFAULT;
              tty_audit_tiocsti(tty, ch);
              ld = tty_ldisc_ref_wait(tty);
              if (ld->ops->receive_buf)
    1                 ld->ops->receive_buf(tty, &ch, &mbz, 1);
              tty_ldisc_deref(ld);
              return 0;
      }
      
      /**
       *        tiocgwinsz                -        implement window query ioctl
       *        @tty; tty
       *        @arg: user buffer for result
       *
       *        Copies the kernel idea of the window size into the user buffer.
       *
       *        Locking: tty->winsize_mutex is taken to ensure the winsize data
       *                is consistent.
       */
      
      static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
      {
    3         int err;
      
              mutex_lock(&tty->winsize_mutex);
              err = copy_to_user(arg, &tty->winsize, sizeof(*arg));
              mutex_unlock(&tty->winsize_mutex);
      
              return err ? -EFAULT: 0;
      }
      
      /**
       *        tty_do_resize                -        resize event
       *        @tty: tty being resized
       *        @rows: rows (character)
       *        @cols: cols (character)
       *
       *        Update the termios variables and send the necessary signals to
       *        peform a terminal resize correctly
       */
      
      int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
      {
              struct pid *pgrp;
    2 
              /* Lock the tty */
              mutex_lock(&tty->winsize_mutex);
              if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
                      goto done;
    1 
              /* Signal the foreground process group */
              pgrp = tty_get_pgrp(tty);
    1         if (pgrp)
                      kill_pgrp(pgrp, SIGWINCH, 1);
              put_pid(pgrp);
      
    2         tty->winsize = *ws;
      done:
              mutex_unlock(&tty->winsize_mutex);
              return 0;
      }
      EXPORT_SYMBOL(tty_do_resize);
      
      /**
       *        tiocswinsz                -        implement window size set ioctl
       *        @tty; tty side of tty
       *        @arg: user buffer for result
       *
       *        Copies the user idea of the window size to the kernel. Traditionally
       *        this is just advisory information but for the Linux console it
       *        actually has driver level meaning and triggers a VC resize.
       *
       *        Locking:
       *                Driver dependent. The default do_resize method takes the
       *        tty termios mutex and ctrl_lock. The console takes its own lock
       *        then calls into the default method.
       */
      
      static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
    5 {
              struct winsize tmp_ws;
              if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
    2                 return -EFAULT;
      
              if (tty->ops->resize)
    2                 return tty->ops->resize(tty, &tmp_ws);
              else
                      return tty_do_resize(tty, &tmp_ws);
      }
      
      /**
       *        tioccons        -        allow admin to move logical console
       *        @file: the file to become console
       *
       *        Allow the administrator to move the redirected console device
       *
       *        Locking: uses redirect_lock to guard the redirect information
       */
      
    1 static int tioccons(struct file *file)
      {
              if (!capable(CAP_SYS_ADMIN))
                      return -EPERM;
              if (file->f_op->write == redirected_tty_write) {
                      struct file *f;
                      spin_lock(&redirect_lock);
                      f = redirect;
                      redirect = NULL;
                      spin_unlock(&redirect_lock);
                      if (f)
                              fput(f);
                      return 0;
              }
              spin_lock(&redirect_lock);
              if (redirect) {
                      spin_unlock(&redirect_lock);
                      return -EBUSY;
              }
              redirect = get_file(file);
              spin_unlock(&redirect_lock);
              return 0;
      }
      
      /**
       *        fionbio                -        non blocking ioctl
       *        @file: file to set blocking value
       *        @p: user parameter
       *
       *        Historical tty interfaces had a blocking control ioctl before
       *        the generic functionality existed. This piece of history is preserved
       *        in the expected tty API of posix OS's.
       *
       *        Locking: none, the open file handle ensures it won't go away.
       */
      
      static int fionbio(struct file *file, int __user *p)
      {
              int nonblock;
      
              if (get_user(nonblock, p))
                      return -EFAULT;
      
              spin_lock(&file->f_lock);
              if (nonblock)
                      file->f_flags |= O_NONBLOCK;
              else
                      file->f_flags &= ~O_NONBLOCK;
              spin_unlock(&file->f_lock);
              return 0;
      }
      
      /**
       *        tiocsctty        -        set controlling tty
       *        @tty: tty structure
       *        @arg: user argument
       *
       *        This ioctl is used to manage job control. It permits a session
       *        leader to set this tty as the controlling tty for the session.
       *
       *        Locking:
       *                Takes tty_lock() to serialize proc_set_tty() for this tty
       *                Takes tasklist_lock internally to walk sessions
       *                Takes ->siglock() when updating signal->tty
       */
      
      static int tiocsctty(struct tty_struct *tty, struct file *file, int arg)
      {
    3         int ret = 0;
      
              tty_lock(tty);
              read_lock(&tasklist_lock);
      
              if (current->signal->leader && (task_session(current) == tty->session))
                      goto unlock;
      
              /*
               * The process must be a session leader and
    2          * not have a controlling tty already.
               */
              if (!current->signal->leader || current->signal->tty) {
                      ret = -EPERM;
                      goto unlock;
              }
      
              if (tty->session) {
                      /*
                       * This tty is already the controlling
                       * tty for another session group!
                       */
                      if (arg == 1 && capable(CAP_SYS_ADMIN)) {
                              /*
                               * Steal it away
                               */
                              session_clear_tty(tty->session);
                      } else {
                              ret = -EPERM;
                              goto unlock;
                      }
              }
      
              /* See the comment in tty_open(). */
              if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) {
                      ret = -EPERM;
                      goto unlock;
              }
      
    2         proc_set_tty(tty);
      unlock:
              read_unlock(&tasklist_lock);
              tty_unlock(tty);
              return ret;
      }
      
      /**
       *        tty_get_pgrp        -        return a ref counted pgrp pid
       *        @tty: tty to read
       *
       *        Returns a refcounted instance of the pid struct for the process
       *        group controlling the tty.
       */
      
      struct pid *tty_get_pgrp(struct tty_struct *tty)
      {
              unsigned long flags;
    6         struct pid *pgrp;
      
    6         spin_lock_irqsave(&tty->ctrl_lock, flags);
              pgrp = get_pid(tty->pgrp);
              spin_unlock_irqrestore(&tty->ctrl_lock, flags);
      
              return pgrp;
      }
      EXPORT_SYMBOL_GPL(tty_get_pgrp);
      
      /*
       * This checks not only the pgrp, but falls back on the pid if no
       * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
       * without this...
       *
       * The caller must hold rcu lock or the tasklist lock.
       */
      static struct pid *session_of_pgrp(struct pid *pgrp)
      {
              struct task_struct *p;
              struct pid *sid = NULL;
      
              p = pid_task(pgrp, PIDTYPE_PGID);
              if (p == NULL)
                      p = pid_task(pgrp, PIDTYPE_PID);
              if (p != NULL)
                      sid = task_session(p);
      
              return sid;
      }
      
      /**
       *        tiocgpgrp                -        get process group
       *        @tty: tty passed by user
       *        @real_tty: tty side of the tty passed by the user if a pty else the tty
       *        @p: returned pid
       *
       *        Obtain the process group of the tty. If there is no process group
       *        return an error.
       *
       *        Locking: none. Reference to current->signal->tty is safe.
       */
      
      static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
      {
              struct pid *pid;
              int ret;
              /*
               * (tty == real_tty) is a cheap way of
    4          * testing if the tty is NOT a master pty.
               */
    3         if (tty == real_tty && current->signal->tty != real_tty)
                      return -ENOTTY;
              pid = tty_get_pgrp(real_tty);
              ret =  put_user(pid_vnr(pid), p);
              put_pid(pid);
              return ret;
      }
      
      /**
       *        tiocspgrp                -        attempt to set process group
       *        @tty: tty passed by user
       *        @real_tty: tty side device matching tty passed by user
       *        @p: pid pointer
       *
       *        Set the process group of the tty to the session passed. Only
       *        permitted where the tty session is our session.
       *
       *        Locking: RCU, ctrl lock
       */
      
      static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
      {
    1         struct pid *pgrp;
              pid_t pgrp_nr;
              int retval = tty_check_change(real_tty);
      
    1         if (retval == -EIO)
                      return -ENOTTY;
    1         if (retval)
                      return retval;
              if (!current->signal->tty ||
                  (current->signal->tty != real_tty) ||
                  (real_tty->session != task_session(current)))
                      return -ENOTTY;
              if (get_user(pgrp_nr, p))
                      return -EFAULT;
              if (pgrp_nr < 0)
                      return -EINVAL;
              rcu_read_lock();
              pgrp = find_vpid(pgrp_nr);
              retval = -ESRCH;
              if (!pgrp)
                      goto out_unlock;
              retval = -EPERM;
              if (session_of_pgrp(pgrp) != task_session(current))
                      goto out_unlock;
              retval = 0;
              spin_lock_irq(&tty->ctrl_lock);
              put_pid(real_tty->pgrp);
              real_tty->pgrp = get_pid(pgrp);
              spin_unlock_irq(&tty->ctrl_lock);
      out_unlock:
              rcu_read_unlock();
              return retval;
      }
      
      /**
       *        tiocgsid                -        get session id
       *        @tty: tty passed by user
       *        @real_tty: tty side of the tty passed by the user if a pty else the tty
       *        @p: pointer to returned session id
       *
       *        Obtain the session id of the tty. If there is no session
       *        return an error.
       *
       *        Locking: none. Reference to current->signal->tty is safe.
       */
      
      static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
      {
              /*
               * (tty == real_tty) is a cheap way of
    2          * testing if the tty is NOT a master pty.
              */
    1         if (tty == real_tty && current->signal->tty != real_tty)
                      return -ENOTTY;
              if (!real_tty->session)
                      return -ENOTTY;
              return put_user(pid_vnr(real_tty->session), p);
      }
      
      /**
       *        tiocsetd        -        set line discipline
       *        @tty: tty device
       *        @p: pointer to user data
       *
       *        Set the line discipline according to user request.
       *
       *        Locking: see tty_set_ldisc, this function is just a helper
       */
      
      static int tiocsetd(struct tty_struct *tty, int __user *p)
      {
              int ldisc;
   28         int ret;
      
              if (get_user(ldisc, p))
   28                 return -EFAULT;
      
              ret = tty_set_ldisc(tty, ldisc);
      
              return ret;
      }
      
      /**
       *        tiocgetd        -        get line discipline
       *        @tty: tty device
       *        @p: pointer to user data
       *
       *        Retrieves the line discipline id directly from the ldisc.
       *
       *        Locking: waits for ldisc reference (in case the line discipline
       *                is changing or the tty is being hungup)
       */
      
      static int tiocgetd(struct tty_struct *tty, int __user *p)
      {
              struct tty_ldisc *ld;
    3         int ret;
      
              ld = tty_ldisc_ref_wait(tty);
              ret = put_user(ld->ops->num, p);
              tty_ldisc_deref(ld);
              return ret;
      }
      
      /**
       *        send_break        -        performed time break
       *        @tty: device to break on
       *        @duration: timeout in mS
       *
       *        Perform a timed break on hardware that lacks its own driver level
       *        timed break functionality.
       *
       *        Locking:
       *                atomic_write_lock serializes
       *
       */
      
      static int send_break(struct tty_struct *tty, unsigned int duration)
      {
    3         int retval;
      
              if (tty->ops->break_ctl == NULL)
                      return 0;
    3 
              if (tty->driver->flags & TTY_DRIVER_HARDWARE_BREAK)
                      retval = tty->ops->break_ctl(tty, duration);
              else {
                      /* Do the work ourselves */
                      if (tty_write_lock(tty, 0) < 0)
                              return -EINTR;
                      retval = tty->ops->break_ctl(tty, -1);
                      if (retval)
                              goto out;
                      if (!signal_pending(current))
                              msleep_interruptible(duration);
                      retval = tty->ops->break_ctl(tty, 0);
      out:
                      tty_write_unlock(tty);
                      if (signal_pending(current))
                              retval = -EINTR;
              }
              return retval;
      }
      
      /**
       *        tty_tiocmget                -        get modem status
       *        @tty: tty device
       *        @file: user file pointer
       *        @p: pointer to result
       *
       *        Obtain the modem status bits from the tty driver if the feature
       *        is supported. Return -EINVAL if it is not available.
       *
       *        Locking: none (up to the driver)
       */
      
      static int tty_tiocmget(struct tty_struct *tty, int __user *p)
      {
    1         int retval = -EINVAL;
      
              if (tty->ops->tiocmget) {
                      retval = tty->ops->tiocmget(tty);
      
                      if (retval >= 0)
                              retval = put_user(retval, p);
              }
              return retval;
      }
      
      /**
       *        tty_tiocmset                -        set modem status
       *        @tty: tty device
       *        @cmd: command - clear bits, set bits or set all
       *        @p: pointer to desired bits
       *
       *        Set the modem status bits from the tty driver if the feature
       *        is supported. Return -EINVAL if it is not available.
       *
       *        Locking: none (up to the driver)
       */
      
      static int tty_tiocmset(struct tty_struct *tty, unsigned int cmd,
                   unsigned __user *p)
      {
              int retval;
    1         unsigned int set, clear, val;
      
              if (tty->ops->tiocmset == NULL)
                      return -EINVAL;
      
              retval = get_user(val, p);
              if (retval)
                      return retval;
              set = clear = 0;
              switch (cmd) {
              case TIOCMBIS:
                      set = val;
                      break;
              case TIOCMBIC:
                      clear = val;
                      break;
              case TIOCMSET:
                      set = val;
                      clear = ~val;
                      break;
              }
              set &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
              clear &= TIOCM_DTR|TIOCM_RTS|TIOCM_OUT1|TIOCM_OUT2|TIOCM_LOOP;
              return tty->ops->tiocmset(tty, set, clear);
      }
      
      static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
      {
    2         int retval = -EINVAL;
              struct serial_icounter_struct icount;
              memset(&icount, 0, sizeof(icount));
              if (tty->ops->get_icount)
                      retval = tty->ops->get_icount(tty, &icount);
              if (retval != 0)
                      return retval;
              if (copy_to_user(arg, &icount, sizeof(icount)))
                      return -EFAULT;
              return 0;
      }
      
      static void tty_warn_deprecated_flags(struct serial_struct __user *ss)
      {
              static DEFINE_RATELIMIT_STATE(depr_flags,
                              DEFAULT_RATELIMIT_INTERVAL,
                              DEFAULT_RATELIMIT_BURST);
              char comm[TASK_COMM_LEN];
    5         int flags;
    5 
              if (get_user(flags, &ss->flags))
    3                 return;
      
    2         flags &= ASYNC_DEPRECATED;
    2 
              if (flags && __ratelimit(&depr_flags))
                      pr_warning("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n",
                                      __func__, get_task_comm(comm, current), flags);
      }
      
      /*
       * if pty, return the slave side (real_tty)
       * otherwise, return self
       */
  247 static struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
      {
  185         if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
                  tty->driver->subtype == PTY_TYPE_MASTER)
                      tty = tty->link;
              return tty;
      }
      
      /*
       * Split this up, as gcc can choke on it otherwise..
       */
  247 long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
      {
              struct tty_struct *tty = file_tty(file);
              struct tty_struct *real_tty;
              void __user *p = (void __user *)arg;
              int retval;
              struct tty_ldisc *ld;
      
              if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
  247                 return -EINVAL;
      
              real_tty = tty_pair_get_tty(tty);
      
              /*
  247          * Factor out some common prep work
               */
              switch (cmd) {
              case TIOCSETD:
              case TIOCSBRK:
              case TIOCCBRK:
   34         case TCSBRK:
              case TCSBRKP:
                      retval = tty_check_change(tty);
   34                 if (retval)
   33                         return retval;
                      if (cmd != TIOCCBRK) {
                              tty_wait_until_sent(tty, 0);
                              if (signal_pending(current))
                                      return -EINTR;
                      }
                      break;
              }
      
              /*
  246          *        Now do the stuff.
               */
    1         switch (cmd) {
              case TIOCSTI:
    3                 return tiocsti(tty, p);
              case TIOCGWINSZ:
    5                 return tiocgwinsz(real_tty, p);
              case TIOCSWINSZ:
    2                 return tiocswinsz(real_tty, p);
              case TIOCCONS:
                      return real_tty != tty ? -EINVAL : tioccons(file);
              case FIONBIO:
    1                 return fionbio(file, p);
              case TIOCEXCL:
                      set_bit(TTY_EXCLUSIVE, &tty->flags);
    1                 return 0;
              case TIOCNXCL:
                      clear_bit(TTY_EXCLUSIVE, &tty->flags);
                      return 0;
    2         case TIOCGEXCL:
              {
                      int excl = test_bit(TTY_EXCLUSIVE, &tty->flags);
                      return put_user(excl, (int __user *)p);
    1         }
              case TIOCNOTTY:
                      if (current->signal->tty != tty)
                              return -ENOTTY;
                      no_tty();
    3                 return 0;
              case TIOCSCTTY:
    4                 return tiocsctty(tty, file, arg);
              case TIOCGPGRP:
    1                 return tiocgpgrp(tty, real_tty, p);
              case TIOCSPGRP:
    2                 return tiocspgrp(tty, real_tty, p);
              case TIOCGSID:
    3                 return tiocgsid(tty, real_tty, p);
              case TIOCGETD:
   28                 return tiocgetd(tty, p);
              case TIOCSETD:
    1                 return tiocsetd(tty, p);
              case TIOCVHANGUP:
                      if (!capable(CAP_SYS_ADMIN))
                              return -EPERM;
                      tty_vhangup(tty);
                      return 0;
    2         case TIOCGDEV:
              {
                      unsigned int ret = new_encode_dev(tty_devnum(real_tty));
                      return put_user(ret, (unsigned int __user *)p);
              }
              /*
               * Break handling
    1          */
              case TIOCSBRK:        /* Turn break on, unconditionally */
                      if (tty->ops->break_ctl)
                              return tty->ops->break_ctl(tty, -1);
    1                 return 0;
              case TIOCCBRK:        /* Turn break off, unconditionally */
                      if (tty->ops->break_ctl)
                              return tty->ops->break_ctl(tty, 0);
                      return 0;
              case TCSBRK:   /* SVID version: non-zero arg --> no break */
                      /* non-zero arg means wait for all output data
                       * to be sent (performed above) but don't send break.
    2                  * This is used by the tcdrain() termios function.
    1                  */
                      if (!arg)
                              return send_break(tty, 250);
    2                 return 0;
              case TCSBRKP:        /* support for POSIX tcsendbreak() */
                      return send_break(tty, arg ? arg*100 : 250);
    1 
              case TIOCMGET:
                      return tty_tiocmget(tty, p);
              case TIOCMSET:
    1         case TIOCMBIC:
              case TIOCMBIS:
    2                 return tty_tiocmset(tty, cmd, p);
              case TIOCGICOUNT:
    2                 retval = tty_tiocgicount(tty, p);
                      /* For the moment allow fall through to the old method */
                      if (retval != -EINVAL)
                              return retval;
   12                 break;
              case TCFLSH:
                      switch (arg) {
                      case TCIFLUSH:
    9                 case TCIOFLUSH:
                      /* flush tty buffer and allow ldisc to process ioctl */
                              tty_buffer_flush(tty, NULL);
                              break;
                      }
    6                 break;
              case TIOCSSERIAL:
                      tty_warn_deprecated_flags(p);
  190                 break;
  143         }
              if (tty->ops->ioctl) {
  203                 retval = tty->ops->ioctl(tty, cmd, arg);
                      if (retval != -ENOIOCTLCMD)
  141                         return retval;
              }
              ld = tty_ldisc_ref_wait(tty);
  136         retval = -EINVAL;
  106         if (ld->ops->ioctl) {
                      retval = ld->ops->ioctl(tty, file, cmd, arg);
                      if (retval == -ENOIOCTLCMD)
  110                         retval = -ENOTTY;
              }
              tty_ldisc_deref(ld);
              return retval;
      }
      
      #ifdef CONFIG_COMPAT
      static long tty_compat_ioctl(struct file *file, unsigned int cmd,
                                      unsigned long arg)
      {
              struct tty_struct *tty = file_tty(file);
              struct tty_ldisc *ld;
              int retval = -ENOIOCTLCMD;
      
              if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
                      return -EINVAL;
      
              if (tty->ops->compat_ioctl) {
                      retval = tty->ops->compat_ioctl(tty, cmd, arg);
                      if (retval != -ENOIOCTLCMD)
                              return retval;
              }
      
              ld = tty_ldisc_ref_wait(tty);
              if (ld->ops->compat_ioctl)
                      retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
              else
                      retval = n_tty_compat_ioctl_helper(tty, file, cmd, arg);
              tty_ldisc_deref(ld);
      
              return retval;
      }
      #endif
      
      static int this_tty(const void *t, struct file *file, unsigned fd)
      {
              if (likely(file->f_op->read != tty_read))
                      return 0;
              return file_tty(file) != t ? 0 : fd + 1;
      }
              
      /*
       * This implements the "Secure Attention Key" ---  the idea is to
       * prevent trojan horses by killing all processes associated with this
       * tty when the user hits the "Secure Attention Key".  Required for
       * super-paranoid applications --- see the Orange Book for more details.
       *
       * This code could be nicer; ideally it should send a HUP, wait a few
       * seconds, then send a INT, and then a KILL signal.  But you then
       * have to coordinate with the init process, since all processes associated
       * with the current tty must be dead before the new getty is allowed
       * to spawn.
       *
       * Now, if it would be correct ;-/ The current code has a nasty hole -
       * it doesn't catch files in flight. We may send the descriptor to ourselves
       * via AF_UNIX socket, close it and later fetch from socket. FIXME.
       *
       * Nasty bug: do_SAK is being called in interrupt context.  This can
       * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
       */
      void __do_SAK(struct tty_struct *tty)
      {
      #ifdef TTY_SOFT_SAK
              tty_hangup(tty);
      #else
              struct task_struct *g, *p;
              struct pid *session;
              int                i;
      
              if (!tty)
                      return;
              session = tty->session;
      
              tty_ldisc_flush(tty);
      
              tty_driver_flush_buffer(tty);
      
              read_lock(&tasklist_lock);
              /* Kill the entire session */
              do_each_pid_task(session, PIDTYPE_SID, p) {
                      printk(KERN_NOTICE "SAK: killed process %d"
                              " (%s): task_session(p)==tty->session\n",
                              task_pid_nr(p), p->comm);
                      send_sig(SIGKILL, p, 1);
              } while_each_pid_task(session, PIDTYPE_SID, p);
              /* Now kill any processes that happen to have the
               * tty open.
               */
              do_each_thread(g, p) {
                      if (p->signal->tty == tty) {
                              printk(KERN_NOTICE "SAK: killed process %d"
                                  " (%s): task_session(p)==tty->session\n",
                                  task_pid_nr(p), p->comm);
                              send_sig(SIGKILL, p, 1);
                              continue;
                      }
                      task_lock(p);
                      i = iterate_fd(p->files, 0, this_tty, tty);
                      if (i != 0) {
                              printk(KERN_NOTICE "SAK: killed process %d"
                                  " (%s): fd#%d opened to the tty\n",
                                          task_pid_nr(p), p->comm, i - 1);
                              force_sig(SIGKILL, p);
                      }
                      task_unlock(p);
              } while_each_thread(g, p);
              read_unlock(&tasklist_lock);
      #endif
      }
      
      static void do_SAK_work(struct work_struct *work)
      {
              struct tty_struct *tty =
                      container_of(work, struct tty_struct, SAK_work);
              __do_SAK(tty);
      }
      
      /*
       * The tq handling here is a little racy - tty->SAK_work may already be queued.
       * Fortunately we don't need to worry, because if ->SAK_work is already queued,
       * the values which we write to it will be identical to the values which it
       * already has. --akpm
       */
      void do_SAK(struct tty_struct *tty)
      {
              if (!tty)
                      return;
              schedule_work(&tty->SAK_work);
      }
      
      EXPORT_SYMBOL(do_SAK);
      
      static int dev_match_devt(struct device *dev, const void *data)
  155 {
              const dev_t *devt = data;
              return dev->devt == *devt;
      }
      
      /* Must put_device() after it's unused! */
      static struct device *tty_get_device(struct tty_struct *tty)
      {
              dev_t devt = tty_devnum(tty);
              return class_find_device(tty_class, NULL, &devt, dev_match_devt);
      }
      
      
      /**
       *        alloc_tty_struct
       *
       *        This subroutine allocates and initializes a tty structure.
       *
       *        Locking: none - tty in question is not exposed at this point
       */
      
      struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
      {
  155         struct tty_struct *tty;
      
              tty = kzalloc(sizeof(*tty), GFP_KERNEL);
              if (!tty)
  155                 return NULL;
      
              kref_init(&tty->kref);
              tty->magic = TTY_MAGIC;
              if (tty_ldisc_init(tty)) {
                      kfree(tty);
  155                 return NULL;
              }
              tty->session = NULL;
              tty->pgrp = NULL;
              mutex_init(&tty->legacy_mutex);
              mutex_init(&tty->throttle_mutex);
              init_rwsem(&tty->termios_rwsem);
              mutex_init(&tty->winsize_mutex);
              init_ldsem(&tty->ldisc_sem);
              init_waitqueue_head(&tty->write_wait);
              init_waitqueue_head(&tty->read_wait);
              INIT_WORK(&tty->hangup_work, do_tty_hangup);
              mutex_init(&tty->atomic_write_lock);
              spin_lock_init(&tty->ctrl_lock);
              spin_lock_init(&tty->flow_lock);
              INIT_LIST_HEAD(&tty->tty_files);
              INIT_WORK(&tty->SAK_work, do_SAK_work);
      
              tty->driver = driver;
              tty->ops = driver->ops;
              tty->index = idx;
              tty_line_name(driver, idx, tty->name);
  155         tty->dev = tty_get_device(tty);
      
              return tty;
      }
      
      /**
       *        deinitialize_tty_struct
       *        @tty: tty to deinitialize
       *
       *        This subroutine deinitializes a tty structure that has been newly
       *        allocated but tty_release cannot be called on that yet.
       *
       *        Locking: none - tty in question must not be exposed at this point
       */
      void deinitialize_tty_struct(struct tty_struct *tty)
      {
              tty_ldisc_deinit(tty);
      }
      
      /**
       *        tty_put_char        -        write one character to a tty
       *        @tty: tty
       *        @ch: character
       *
       *        Write one byte to the tty using the provided put_char method
       *        if present. Returns the number of characters successfully output.
       *
       *        Note: the specific put_char operation in the driver layer may go
       *        away soon. Don't call it directly, use this method
       */
      
   34 int tty_put_char(struct tty_struct *tty, unsigned char ch)
   34 {
   34         if (tty->ops->put_char)
                      return tty->ops->put_char(tty, ch);
              return tty->ops->write(tty, &ch, 1);
      }
      EXPORT_SYMBOL_GPL(tty_put_char);
      
      struct class *tty_class;
      
      static int tty_cdev_add(struct tty_driver *driver, dev_t dev,
                      unsigned int index, unsigned int count)
      {
              int err;
      
              /* init here, since reused cdevs cause crashes */
              driver->cdevs[index] = cdev_alloc();
              if (!driver->cdevs[index])
                      return -ENOMEM;
              driver->cdevs[index]->ops = &tty_fops;
              driver->cdevs[index]->owner = driver->owner;
              err = cdev_add(driver->cdevs[index], dev, count);
              if (err)
                      kobject_put(&driver->cdevs[index]->kobj);
              return err;
      }
      
      /**
       *        tty_register_device - register a tty device
       *        @driver: the tty driver that describes the tty device
       *        @index: the index in the tty driver for this tty device
       *        @device: a struct device that is associated with this tty device.
       *                This field is optional, if there is no known struct device
       *                for this tty device it can be set to NULL safely.
       *
       *        Returns a pointer to the struct device for this tty device
       *        (or ERR_PTR(-EFOO) on error).
       *
       *        This call is required to be made to register an individual tty device
       *        if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set.  If
       *        that bit is not set, this function should not be called by a tty
       *        driver.
       *
       *        Locking: ??
       */
      
      struct device *tty_register_device(struct tty_driver *driver, unsigned index,
                                         struct device *device)
      {
              return tty_register_device_attr(driver, index, device, NULL, NULL);
      }
      EXPORT_SYMBOL(tty_register_device);
      
      static void tty_device_create_release(struct device *dev)
      {
              pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
              kfree(dev);
      }
      
      /**
       *        tty_register_device_attr - register a tty device
       *        @driver: the tty driver that describes the tty device
       *        @index: the index in the tty driver for this tty device
       *        @device: a struct device that is associated with this tty device.
       *                This field is optional, if there is no known struct device
       *                for this tty device it can be set to NULL safely.
       *        @drvdata: Driver data to be set to device.
       *        @attr_grp: Attribute group to be set on device.
       *
       *        Returns a pointer to the struct device for this tty device
       *        (or ERR_PTR(-EFOO) on error).
       *
       *        This call is required to be made to register an individual tty device
       *        if the tty driver's flags have the TTY_DRIVER_DYNAMIC_DEV bit set.  If
       *        that bit is not set, this function should not be called by a tty
       *        driver.
       *
       *        Locking: ??
       */
      struct device *tty_register_device_attr(struct tty_driver *driver,
                                         unsigned index, struct device *device,
                                         void *drvdata,
                                         const struct attribute_group **attr_grp)
      {
              char name[64];
              dev_t devt = MKDEV(driver->major, driver->minor_start) + index;
              struct device *dev = NULL;
              int retval = -ENODEV;
              bool cdev = false;
      
              if (index >= driver->num) {
                      printk(KERN_ERR "Attempt to register invalid tty line number "
                             " (%d).\n", index);
                      return ERR_PTR(-EINVAL);
              }
      
              if (driver->type == TTY_DRIVER_TYPE_PTY)
                      pty_line_name(driver, index, name);
              else
                      tty_line_name(driver, index, name);
      
              if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                      retval = tty_cdev_add(driver, devt, index, 1);
                      if (retval)
                              goto error;
                      cdev = true;
              }
      
              dev = kzalloc(sizeof(*dev), GFP_KERNEL);
              if (!dev) {
                      retval = -ENOMEM;
                      goto error;
              }
      
              dev->devt = devt;
              dev->class = tty_class;
              dev->parent = device;
              dev->release = tty_device_create_release;
              dev_set_name(dev, "%s", name);
              dev->groups = attr_grp;
              dev_set_drvdata(dev, drvdata);
      
              retval = device_register(dev);
              if (retval)
                      goto error;
      
              return dev;
      
      error:
              put_device(dev);
              if (cdev) {
                      cdev_del(driver->cdevs[index]);
                      driver->cdevs[index] = NULL;
              }
              return ERR_PTR(retval);
      }
      EXPORT_SYMBOL_GPL(tty_register_device_attr);
      
      /**
       *         tty_unregister_device - unregister a tty device
       *         @driver: the tty driver that describes the tty device
       *         @index: the index in the tty driver for this tty device
       *
       *         If a tty device is registered with a call to tty_register_device() then
       *        this function must be called when the tty device is gone.
       *
       *        Locking: ??
       */
      
      void tty_unregister_device(struct tty_driver *driver, unsigned index)
      {
              device_destroy(tty_class,
                      MKDEV(driver->major, driver->minor_start) + index);
              if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                      cdev_del(driver->cdevs[index]);
                      driver->cdevs[index] = NULL;
              }
      }
      EXPORT_SYMBOL(tty_unregister_device);
      
      /**
       * __tty_alloc_driver -- allocate tty driver
       * @lines: count of lines this driver can handle at most
       * @owner: module which is repsonsible for this driver
       * @flags: some of TTY_DRIVER_* flags, will be set in driver->flags
       *
       * This should not be called directly, some of the provided macros should be
       * used instead. Use IS_ERR and friends on @retval.
       */
      struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner,
                      unsigned long flags)
      {
              struct tty_driver *driver;
              unsigned int cdevs = 1;
              int err;
      
              if (!lines || (flags & TTY_DRIVER_UNNUMBERED_NODE && lines > 1))
                      return ERR_PTR(-EINVAL);
      
              driver = kzalloc(sizeof(struct tty_driver), GFP_KERNEL);
              if (!driver)
                      return ERR_PTR(-ENOMEM);
      
              kref_init(&driver->kref);
              driver->magic = TTY_DRIVER_MAGIC;
              driver->num = lines;
              driver->owner = owner;
              driver->flags = flags;
      
              if (!(flags & TTY_DRIVER_DEVPTS_MEM)) {
                      driver->ttys = kcalloc(lines, sizeof(*driver->ttys),
                                      GFP_KERNEL);
                      driver->termios = kcalloc(lines, sizeof(*driver->termios),
                                      GFP_KERNEL);
                      if (!driver->ttys || !driver->termios) {
                              err = -ENOMEM;
                              goto err_free_all;
                      }
              }
      
              if (!(flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
                      driver->ports = kcalloc(lines, sizeof(*driver->ports),
                                      GFP_KERNEL);
                      if (!driver->ports) {
                              err = -ENOMEM;
                              goto err_free_all;
                      }
                      cdevs = lines;
              }
      
              driver->cdevs = kcalloc(cdevs, sizeof(*driver->cdevs), GFP_KERNEL);
              if (!driver->cdevs) {
                      err = -ENOMEM;
                      goto err_free_all;
              }
      
              return driver;
      err_free_all:
              kfree(driver->ports);
              kfree(driver->ttys);
              kfree(driver->termios);
              kfree(driver->cdevs);
              kfree(driver);
              return ERR_PTR(err);
      }
      EXPORT_SYMBOL(__tty_alloc_driver);
      
      static void destruct_tty_driver(struct kref *kref)
      {
              struct tty_driver *driver = container_of(kref, struct tty_driver, kref);
              int i;
              struct ktermios *tp;
      
              if (driver->flags & TTY_DRIVER_INSTALLED) {
                      /*
                       * Free the termios and termios_locked structures because
                       * we don't want to get memory leaks when modular tty
                       * drivers are removed from the kernel.
                       */
                      for (i = 0; i < driver->num; i++) {
                              tp = driver->termios[i];
                              if (tp) {
                                      driver->termios[i] = NULL;
                                      kfree(tp);
                              }
                              if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV))
                                      tty_unregister_device(driver, i);
                      }
                      proc_tty_unregister_driver(driver);
                      if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)
                              cdev_del(driver->cdevs[0]);
              }
              kfree(driver->cdevs);
              kfree(driver->ports);
              kfree(driver->termios);
              kfree(driver->ttys);
              kfree(driver);
      }
      
   51 void tty_driver_kref_put(struct tty_driver *driver)
   51 {
              kref_put(&driver->kref, destruct_tty_driver);
      }
      EXPORT_SYMBOL(tty_driver_kref_put);
      
      void tty_set_operations(struct tty_driver *driver,
                              const struct tty_operations *op)
      {
              driver->ops = op;
      };
      EXPORT_SYMBOL(tty_set_operations);
      
      void put_tty_driver(struct tty_driver *d)
      {
              tty_driver_kref_put(d);
      }
      EXPORT_SYMBOL(put_tty_driver);
      
      /*
       * Called by a tty driver to register itself.
       */
      int tty_register_driver(struct tty_driver *driver)
      {
              int error;
              int i;
              dev_t dev;
              struct device *d;
      
              if (!driver->major) {
                      error = alloc_chrdev_region(&dev, driver->minor_start,
                                                      driver->num, driver->name);
                      if (!error) {
                              driver->major = MAJOR(dev);
                              driver->minor_start = MINOR(dev);
                      }
              } else {
                      dev = MKDEV(driver->major, driver->minor_start);
                      error = register_chrdev_region(dev, driver->num, driver->name);
              }
              if (error < 0)
                      goto err;
      
              if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) {
                      error = tty_cdev_add(driver, dev, 0, driver->num);
                      if (error)
                              goto err_unreg_char;
              }
      
              mutex_lock(&tty_mutex);
              list_add(&driver->tty_drivers, &tty_drivers);
              mutex_unlock(&tty_mutex);
      
              if (!(driver->flags & TTY_DRIVER_DYNAMIC_DEV)) {
                      for (i = 0; i < driver->num; i++) {
                              d = tty_register_device(driver, i, NULL);
                              if (IS_ERR(d)) {
                                      error = PTR_ERR(d);
                                      goto err_unreg_devs;
                              }
                      }
              }
              proc_tty_register_driver(driver);
              driver->flags |= TTY_DRIVER_INSTALLED;
              return 0;
      
      err_unreg_devs:
              for (i--; i >= 0; i--)
                      tty_unregister_device(driver, i);
      
              mutex_lock(&tty_mutex);
              list_del(&driver->tty_drivers);
              mutex_unlock(&tty_mutex);
      
      err_unreg_char:
              unregister_chrdev_region(dev, driver->num);
      err:
              return error;
      }
      EXPORT_SYMBOL(tty_register_driver);
      
      /*
       * Called by a tty driver to unregister itself.
       */
      int tty_unregister_driver(struct tty_driver *driver)
      {
      #if 0
              /* FIXME */
              if (driver->refcount)
                      return -EBUSY;
      #endif
              unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
                                      driver->num);
              mutex_lock(&tty_mutex);
              list_del(&driver->tty_drivers);
              mutex_unlock(&tty_mutex);
              return 0;
      }
      
      EXPORT_SYMBOL(tty_unregister_driver);
      
    2 dev_t tty_devnum(struct tty_struct *tty)
      {
              return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
      }
      EXPORT_SYMBOL(tty_devnum);
      
      void tty_default_fops(struct file_operations *fops)
      {
              *fops = tty_fops;
      }
      
      /*
       * Initialize the console device. This is called *early*, so
       * we can't necessarily depend on lots of kernel help here.
       * Just do some early initializations, and do the complex setup
       * later.
       */
      void __init console_init(void)
      {
              initcall_t *call;
      
              /* Setup the default TTY line discipline. */
              tty_ldisc_begin();
      
              /*
               * set up the console device so that later boot sequences can
               * inform about problems etc..
               */
              call = __con_initcall_start;
              while (call < __con_initcall_end) {
                      (*call)();
                      call++;
              }
      }
      
      static char *tty_devnode(struct device *dev, umode_t *mode)
      {
              if (!mode)
                      return NULL;
              if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
                  dev->devt == MKDEV(TTYAUX_MAJOR, 2))
                      *mode = 0666;
              return NULL;
      }
      
      static int __init tty_class_init(void)
      {
              tty_class = class_create(THIS_MODULE, "tty");
              if (IS_ERR(tty_class))
                      return PTR_ERR(tty_class);
              tty_class->devnode = tty_devnode;
              return 0;
      }
      
      postcore_initcall(tty_class_init);
      
      /* 3/2004 jmc: why do these devices exist? */
      static struct cdev tty_cdev, console_cdev;
      
      static ssize_t show_cons_active(struct device *dev,
                                      struct device_attribute *attr, char *buf)
      {
              struct console *cs[16];
              int i = 0;
              struct console *c;
              ssize_t count = 0;
      
              console_lock();
              for_each_console(c) {
                      if (!c->device)
                              continue;
                      if (!c->write)
                              continue;
                      if ((c->flags & CON_ENABLED) == 0)
                              continue;
                      cs[i++] = c;
                      if (i >= ARRAY_SIZE(cs))
                              break;
              }
              while (i--) {
                      int index = cs[i]->index;
                      struct tty_driver *drv = cs[i]->device(cs[i], &index);
      
                      /* don't resolve tty0 as some programs depend on it */
                      if (drv && (cs[i]->index > 0 || drv->major != TTY_MAJOR))
                              count += tty_line_name(drv, index, buf + count);
                      else
                              count += sprintf(buf + count, "%s%d",
                                               cs[i]->name, cs[i]->index);
      
                      count += sprintf(buf + count, "%c", i ? ' ':'\n');
              }
              console_unlock();
      
              return count;
      }
      static DEVICE_ATTR(active, S_IRUGO, show_cons_active, NULL);
      
      static struct attribute *cons_dev_attrs[] = {
              &dev_attr_active.attr,
              NULL
      };
      
      ATTRIBUTE_GROUPS(cons_dev);
      
      static struct device *consdev;
      
      void console_sysfs_notify(void)
      {
              if (consdev)
                      sysfs_notify(&consdev->kobj, NULL, "active");
      }
      
      /*
       * Ok, now we can initialize the rest of the tty devices and can count
       * on memory allocations, interrupts etc..
       */
      int __init tty_init(void)
      {
              tty_sysctl_init();
              cdev_init(&tty_cdev, &tty_fops);
              if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
                  register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
                      panic("Couldn't register /dev/tty driver\n");
              device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
      
              cdev_init(&console_cdev, &console_fops);
              if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
                  register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
                      panic("Couldn't register /dev/console driver\n");
              consdev = device_create_with_groups(tty_class, NULL,
                                                  MKDEV(TTYAUX_MAJOR, 1), NULL,
                                                  cons_dev_groups, "console");
              if (IS_ERR(consdev))
                      consdev = NULL;
      
      #ifdef CONFIG_VT
              vty_init(&console_fops);
      #endif
              return 0;
      }
      
      /*
       *        NET3:        Implementation of the ICMP protocol layer.
       *
       *                Alan Cox, <alan@lxorguk.ukuu.org.uk>
       *
       *        This program is free software; you can redistribute it and/or
       *        modify it under the terms of the GNU General Public License
       *        as published by the Free Software Foundation; either version
       *        2 of the License, or (at your option) any later version.
       *
       *        Some of the function names and the icmp unreach table for this
       *        module were derived from [icmp.c 1.0.11 06/02/93] by
       *        Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
       *        Other than that this module is a complete rewrite.
       *
       *        Fixes:
       *        Clemens Fruhwirth        :        introduce global icmp rate limiting
       *                                        with icmp type masking ability instead
       *                                        of broken per type icmp timeouts.
       *                Mike Shaver        :        RFC1122 checks.
       *                Alan Cox        :        Multicast ping reply as self.
       *                Alan Cox        :        Fix atomicity lockup in ip_build_xmit
       *                                        call.
       *                Alan Cox        :        Added 216,128 byte paths to the MTU
       *                                        code.
       *                Martin Mares        :        RFC1812 checks.
       *                Martin Mares        :        Can be configured to follow redirects
       *                                        if acting as a router _without_ a
       *                                        routing protocol (RFC 1812).
       *                Martin Mares        :        Echo requests may be configured to
       *                                        be ignored (RFC 1812).
       *                Martin Mares        :        Limitation of ICMP error message
       *                                        transmit rate (RFC 1812).
       *                Martin Mares        :        TOS and Precedence set correctly
       *                                        (RFC 1812).
       *                Martin Mares        :        Now copying as much data from the
       *                                        original packet as we can without
       *                                        exceeding 576 bytes (RFC 1812).
       *        Willy Konynenberg        :        Transparent proxying support.
       *                Keith Owens        :        RFC1191 correction for 4.2BSD based
       *                                        path MTU bug.
       *                Thomas Quinot        :        ICMP Dest Unreach codes up to 15 are
       *                                        valid (RFC 1812).
       *                Andi Kleen        :        Check all packet lengths properly
       *                                        and moved all kfree_skb() up to
       *                                        icmp_rcv.
       *                Andi Kleen        :        Move the rate limit bookkeeping
       *                                        into the dest entry and use a token
       *                                        bucket filter (thanks to ANK). Make
       *                                        the rates sysctl configurable.
       *                Yu Tianli        :        Fixed two ugly bugs in icmp_send
       *                                        - IP option length was accounted wrongly
       *                                        - ICMP header length was not accounted
       *                                          at all.
       *              Tristan Greaves :       Added sysctl option to ignore bogus
       *                                      broadcast responses from broken routers.
       *
       * To Fix:
       *
       *        - Should use skb_pull() instead of all the manual checking.
       *          This would also greatly simply some upper layer error handlers. --AK
       *
       */
      
      #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      
      #include <linux/module.h>
      #include <linux/types.h>
      #include <linux/jiffies.h>
      #include <linux/kernel.h>
      #include <linux/fcntl.h>
      #include <linux/socket.h>
      #include <linux/in.h>
      #include <linux/inet.h>
      #include <linux/inetdevice.h>
      #include <linux/netdevice.h>
      #include <linux/string.h>
      #include <linux/netfilter_ipv4.h>
      #include <linux/slab.h>
      #include <net/snmp.h>
      #include <net/ip.h>
      #include <net/route.h>
      #include <net/protocol.h>
      #include <net/icmp.h>
      #include <net/tcp.h>
      #include <net/udp.h>
      #include <net/raw.h>
      #include <net/ping.h>
      #include <linux/skbuff.h>
      #include <net/sock.h>