// SPDX-License-Identifier: GPL-2.0-only
      /*
       *  linux/mm/swap.c
       *
       *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
       */
      
      /*
       * This file contains the default values for the operation of the
       * Linux VM subsystem. Fine-tuning documentation can be found in
       * Documentation/admin-guide/sysctl/vm.rst.
       * Started 18.12.91
       * Swap aging added 23.2.95, Stephen Tweedie.
       * Buffermem limits added 12.3.98, Rik van Riel.
       */
      
      #include <linux/mm.h>
      #include <linux/sched.h>
      #include <linux/kernel_stat.h>
      #include <linux/swap.h>
      #include <linux/mman.h>
      #include <linux/pagemap.h>
      #include <linux/pagevec.h>
      #include <linux/init.h>
      #include <linux/export.h>
      #include <linux/mm_inline.h>
      #include <linux/percpu_counter.h>
      #include <linux/memremap.h>
      #include <linux/percpu.h>
      #include <linux/cpu.h>
      #include <linux/notifier.h>
      #include <linux/backing-dev.h>
      #include <linux/memcontrol.h>
      #include <linux/gfp.h>
      #include <linux/uio.h>
      #include <linux/hugetlb.h>
      #include <linux/page_idle.h>
      #include <linux/local_lock.h>
      
      #include "internal.h"
      
      #define CREATE_TRACE_POINTS
      #include <trace/events/pagemap.h>
      
      /* How many pages do we try to swap or page in/out together? */
      int page_cluster;
      
      /* Protecting only lru_rotate.pvec which requires disabling interrupts */
      struct lru_rotate {
              local_lock_t lock;
              struct pagevec pvec;
      };
      static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
              .lock = INIT_LOCAL_LOCK(lock),
      };
      
      /*
       * The following struct pagevec are grouped together because they are protected
       * by disabling preemption (and interrupts remain enabled).
       */
      struct lru_pvecs {
              local_lock_t lock;
              struct pagevec lru_add;
              struct pagevec lru_deactivate_file;
              struct pagevec lru_deactivate;
              struct pagevec lru_lazyfree;
      #ifdef CONFIG_SMP
              struct pagevec activate_page;
      #endif
      };
      static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
              .lock = INIT_LOCAL_LOCK(lock),
      };
      
      /*
       * This path almost never happens for VM activity - pages are normally
       * freed via pagevecs.  But it gets used by networking.
       */
      static void __page_cache_release(struct page *page)
      {
  332         if (PageLRU(page)) {
  112                 pg_data_t *pgdat = page_pgdat(page);
                      struct lruvec *lruvec;
                      unsigned long flags;
      
                      spin_lock_irqsave(&pgdat->lru_lock, flags);
                      lruvec = mem_cgroup_page_lruvec(page, pgdat);
  112                 VM_BUG_ON_PAGE(!PageLRU(page), page);
  112                 __ClearPageLRU(page);
  112                 del_page_from_lru_list(page, lruvec, page_off_lru(page));
                      spin_unlock_irqrestore(&pgdat->lru_lock, flags);
              }
  332         __ClearPageWaiters(page);
      }
      
      static void __put_single_page(struct page *page)
      {
  206         __page_cache_release(page);
              mem_cgroup_uncharge(page);
              free_unref_page(page);
      }
      
      static void __put_compound_page(struct page *page)
      {
              compound_page_dtor *dtor;
      
              /*
               * __page_cache_release() is supposed to be called for thp, not for
               * hugetlb. This is because hugetlb page does never have PageLRU set
               * (it's never listed to any LRU lists) and no memcg routines should
               * be called for hugetlb (it has a separate hugetlb_cgroup.)
               */
  127         if (!PageHuge(page))
  127                 __page_cache_release(page);
  127         dtor = get_compound_page_dtor(page);
              (*dtor)(page);
      }
      
      void __put_page(struct page *page)
      {
  332         if (is_zone_device_page(page)) {
                      put_dev_pagemap(page->pgmap);
      
                      /*
                       * The page belongs to the device that created pgmap. Do
                       * not return it to page allocator.
                       */
                      return;
              }
      
  332         if (unlikely(PageCompound(page)))
  127                 __put_compound_page(page);
              else
  206                 __put_single_page(page);
      }
      EXPORT_SYMBOL(__put_page);
      
      /**
       * put_pages_list() - release a list of pages
       * @pages: list of pages threaded on page->lru
       *
       * Release a list of pages which are strung together on page.lru.  Currently
       * used by read_cache_pages() and related error recovery code.
       */
      void put_pages_list(struct list_head *pages)
      {
              while (!list_empty(pages)) {
                      struct page *victim;
      
                      victim = lru_to_page(pages);
                      list_del(&victim->lru);
                      put_page(victim);
              }
      }
      EXPORT_SYMBOL(put_pages_list);
      
      /*
       * get_kernel_pages() - pin kernel pages in memory
       * @kiov:        An array of struct kvec structures
       * @nr_segs:        number of segments to pin
       * @write:        pinning for read/write, currently ignored
       * @pages:        array that receives pointers to the pages pinned.
       *                Should be at least nr_segs long.
       *
       * Returns number of pages pinned. This may be fewer than the number
       * requested. If nr_pages is 0 or negative, returns 0. If no pages
       * were pinned, returns -errno. Each page returned must be released
       * with a put_page() call when it is finished with.
       */
      int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
                      struct page **pages)
      {
              int seg;
      
              for (seg = 0; seg < nr_segs; seg++) {
                      if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
                              return seg;
      
                      pages[seg] = kmap_to_page(kiov[seg].iov_base);
                      get_page(pages[seg]);
              }
      
              return seg;
      }
      EXPORT_SYMBOL_GPL(get_kernel_pages);
      
      /*
       * get_kernel_page() - pin a kernel page in memory
       * @start:        starting kernel address
       * @write:        pinning for read/write, currently ignored
       * @pages:        array that receives pointer to the page pinned.
       *                Must be at least nr_segs long.
       *
       * Returns 1 if page is pinned. If the page was not pinned, returns
       * -errno. The page returned must be released with a put_page() call
       * when it is finished with.
       */
      int get_kernel_page(unsigned long start, int write, struct page **pages)
      {
              const struct kvec kiov = {
                      .iov_base = (void *)start,
                      .iov_len = PAGE_SIZE
              };
      
              return get_kernel_pages(&kiov, 1, write, pages);
      }
      EXPORT_SYMBOL_GPL(get_kernel_page);
      
      static void pagevec_lru_move_fn(struct pagevec *pvec,
              void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
              void *arg)
      {
              int i;
              struct pglist_data *pgdat = NULL;
              struct lruvec *lruvec;
              unsigned long flags = 0;
      
 1266         for (i = 0; i < pagevec_count(pvec); i++) {
 1266                 struct page *page = pvec->pages[i];
 1266                 struct pglist_data *pagepgdat = page_pgdat(page);
      
                      if (pagepgdat != pgdat) {
 1266                         if (pgdat)
                                      spin_unlock_irqrestore(&pgdat->lru_lock, flags);
                              pgdat = pagepgdat;
 1266                         spin_lock_irqsave(&pgdat->lru_lock, flags);
                      }
      
 1266                 lruvec = mem_cgroup_page_lruvec(page, pgdat);
                      (*move_fn)(page, lruvec, arg);
              }
 1264         if (pgdat)
 1264                 spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 1264         release_pages(pvec->pages, pvec->nr);
              pagevec_reinit(pvec);
      }
      
    1 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
                                       void *arg)
      {
              int *pgmoved = arg;
      
    1         if (PageLRU(page) && !PageUnevictable(page)) {
    1                 del_page_from_lru_list(page, lruvec, page_lru(page));
    1                 ClearPageActive(page);
    1                 add_page_to_lru_list_tail(page, lruvec, page_lru(page));
    1                 (*pgmoved)++;
              }
      }
      
      /*
       * pagevec_move_tail() must be called with IRQ disabled.
       * Otherwise this may cause nasty races.
       */
      static void pagevec_move_tail(struct pagevec *pvec)
      {
    1         int pgmoved = 0;
      
              pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
              __count_vm_events(PGROTATED, pgmoved);
      }
      
      /*
       * Writeback is about to end against a page which has been marked for immediate
       * reclaim.  If it still appears to be reclaimable, move it to the tail of the
       * inactive list.
       */
      void rotate_reclaimable_page(struct page *page)
      {
              if (!PageLocked(page) && !PageDirty(page) &&
                  !PageUnevictable(page) && PageLRU(page)) {
                      struct pagevec *pvec;
                      unsigned long flags;
      
                      get_page(page);
                      local_lock_irqsave(&lru_rotate.lock, flags);
                      pvec = this_cpu_ptr(&lru_rotate.pvec);
                      if (!pagevec_add(pvec, page) || PageCompound(page))
                              pagevec_move_tail(pvec);
                      local_unlock_irqrestore(&lru_rotate.lock, flags);
              }
      }
      
      static void update_page_reclaim_stat(struct lruvec *lruvec,
                                           int file, int rotated)
      {
              struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
      
              reclaim_stat->recent_scanned[file]++;
              if (rotated)
 1209                 reclaim_stat->recent_rotated[file]++;
      }
      
      static void __activate_page(struct page *page, struct lruvec *lruvec,
                                  void *arg)
      {
  147         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  147                 int file = page_is_file_lru(page);
  147                 int lru = page_lru_base_type(page);
      
  147                 del_page_from_lru_list(page, lruvec, lru);
  147                 SetPageActive(page);
                      lru += LRU_ACTIVE;
  147                 add_page_to_lru_list(page, lruvec, lru);
  147                 trace_mm_lru_activate(page);
      
                      __count_vm_event(PGACTIVATE);
                      update_page_reclaim_stat(lruvec, file, 1);
              }
      }
      
      #ifdef CONFIG_SMP
      static void activate_page_drain(int cpu)
      {
  492         struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
      
              if (pagevec_count(pvec))
  146                 pagevec_lru_move_fn(pvec, __activate_page, NULL);
      }
      
      static bool need_activate_page_drain(int cpu)
      {
              return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
      }
      
      void activate_page(struct page *page)
      {
   35         page = compound_head(page);
   35         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
                      struct pagevec *pvec;
      
   35                 local_lock(&lru_pvecs.lock);
                      pvec = this_cpu_ptr(&lru_pvecs.activate_page);
   35                 get_page(page);
   35                 if (!pagevec_add(pvec, page) || PageCompound(page))
    1                         pagevec_lru_move_fn(pvec, __activate_page, NULL);
   35                 local_unlock(&lru_pvecs.lock);
              }
      }
      
      #else
      static inline void activate_page_drain(int cpu)
      {
      }
      
      void activate_page(struct page *page)
      {
              pg_data_t *pgdat = page_pgdat(page);
      
              page = compound_head(page);
              spin_lock_irq(&pgdat->lru_lock);
              __activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
              spin_unlock_irq(&pgdat->lru_lock);
      }
      #endif
      
      static void __lru_cache_activate_page(struct page *page)
      {
              struct pagevec *pvec;
              int i;
      
   89         local_lock(&lru_pvecs.lock);
              pvec = this_cpu_ptr(&lru_pvecs.lru_add);
      
              /*
               * Search backwards on the optimistic assumption that the page being
               * activated has just been added to this pagevec. Note that only
               * the local pagevec is examined as a !PageLRU page could be in the
               * process of being released, reclaimed, migrated or on a remote
               * pagevec that is currently being drained. Furthermore, marking
               * a remote pagevec's page PageActive potentially hits a race where
               * a page is marked PageActive just after it is added to the inactive
               * list causing accounting errors and BUG_ON checks to trigger.
               */
   10         for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
   86                 struct page *pagevec_page = pvec->pages[i];
      
                      if (pagevec_page == page) {
   87                         SetPageActive(page);
                              break;
                      }
              }
      
   89         local_unlock(&lru_pvecs.lock);
      }
      
      /*
       * Mark a page as having seen activity.
       *
       * inactive,unreferenced        ->        inactive,referenced
       * inactive,referenced                ->        active,unreferenced
       * active,unreferenced                ->        active,referenced
       *
       * When a newly allocated page is not yet visible, so safe for non-atomic ops,
       * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
       */
      void mark_page_accessed(struct page *page)
      {
 2127         page = compound_head(page);
      
 2127         if (!PageReferenced(page)) {
  294                 SetPageReferenced(page);
 2125         } else if (PageUnevictable(page)) {
                      /*
                       * Unevictable pages are on the "LRU_UNEVICTABLE" list. But,
                       * this list is never rotated or maintained, so marking an
                       * evictable page accessed has no effect.
                       */
 2125         } else if (!PageActive(page)) {
                      /*
                       * If the page is on the LRU, queue it for activation via
                       * lru_pvecs.activate_page. Otherwise, assume the page is on a
                       * pagevec, mark it active and it'll be moved to the active
                       * LRU on the next drain.
                       */
  112                 if (PageLRU(page))
   35                         activate_page(page);
                      else
   89                         __lru_cache_activate_page(page);
  112                 ClearPageReferenced(page);
  112                 if (page_is_file_lru(page))
  112                         workingset_activation(page);
              }
 2127         if (page_is_idle(page))
                      clear_page_idle(page);
      }
      EXPORT_SYMBOL(mark_page_accessed);
      
      static void __lru_cache_add(struct page *page)
      {
              struct pagevec *pvec;
      
 1232         local_lock(&lru_pvecs.lock);
              pvec = this_cpu_ptr(&lru_pvecs.lru_add);
 1232         get_page(page);
 1232         if (!pagevec_add(pvec, page) || PageCompound(page))
 1052                 __pagevec_lru_add(pvec);
 1230         local_unlock(&lru_pvecs.lock);
      }
      
      /**
       * lru_cache_add_anon - add a page to the page lists
       * @page: the page to add
       */
      void lru_cache_add_anon(struct page *page)
      {
              if (PageActive(page))
                      ClearPageActive(page);
              __lru_cache_add(page);
      }
      
      void lru_cache_add_file(struct page *page)
      {
              if (PageActive(page))
                      ClearPageActive(page);
              __lru_cache_add(page);
      }
      EXPORT_SYMBOL(lru_cache_add_file);
      
      /**
       * lru_cache_add - add a page to a page list
       * @page: the page to be added to the LRU.
       *
       * Queue the page for addition to the LRU via pagevec. The decision on whether
       * to add the page to the [in]active [file|anon] list is deferred until the
       * pagevec is drained. This gives a chance for the caller of lru_cache_add()
       * have the page added to the active list using mark_page_accessed().
       */
      void lru_cache_add(struct page *page)
      {
 1232         VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
 1232         VM_BUG_ON_PAGE(PageLRU(page), page);
 1232         __lru_cache_add(page);
      }
      
      /**
       * lru_cache_add_active_or_unevictable
       * @page:  the page to be added to LRU
       * @vma:   vma in which page is mapped for determining reclaimability
       *
       * Place @page on the active or unevictable LRU list, depending on its
       * evictability.  Note that if the page is not evictable, it goes
       * directly back onto it's zone's unevictable list, it does NOT use a
       * per cpu pagevec.
       */
      void lru_cache_add_active_or_unevictable(struct page *page,
                                               struct vm_area_struct *vma)
      {
  703         VM_BUG_ON_PAGE(PageLRU(page), page);
      
  703         if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED))
  703                 SetPageActive(page);
              else if (!TestSetPageMlocked(page)) {
                      /*
                       * We use the irq-unsafe __mod_zone_page_stat because this
                       * counter is not modified from interrupt context, and the pte
                       * lock is held(spinlock), which implies preemption disabled.
                       */
                      __mod_zone_page_state(page_zone(page), NR_MLOCK,
                                          hpage_nr_pages(page));
                      count_vm_event(UNEVICTABLE_PGMLOCKED);
              }
  703         lru_cache_add(page);
      }
      
      /*
       * If the page can not be invalidated, it is moved to the
       * inactive list to speed up its reclaim.  It is moved to the
       * head of the list, rather than the tail, to give the flusher
       * threads some time to write it out, as this is much more
       * effective than the single-page writeout from reclaim.
       *
       * If the page isn't page_mapped and dirty/writeback, the page
       * could reclaim asap using PG_reclaim.
       *
       * 1. active, mapped page -> none
       * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
       * 3. inactive, mapped page -> none
       * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
       * 5. inactive, clean -> inactive, tail
       * 6. Others -> none
       *
       * In 4, why it moves inactive's head, the VM expects the page would
       * be write it out by flusher threads as this is much more effective
       * than the single-page writeout from reclaim.
       */
   14 static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
                                    void *arg)
      {
              int lru, file;
              bool active;
      
   15         if (!PageLRU(page))
                      return;
      
   14         if (PageUnevictable(page))
                      return;
      
              /* Some processes are using the page */
   14         if (page_mapped(page))
                      return;
      
   14         active = PageActive(page);
   14         file = page_is_file_lru(page);
   14         lru = page_lru_base_type(page);
      
   14         del_page_from_lru_list(page, lruvec, lru + active);
   14         ClearPageActive(page);
   14         ClearPageReferenced(page);
      
   14         if (PageWriteback(page) || PageDirty(page)) {
                      /*
                       * PG_reclaim could be raced with end_page_writeback
                       * It can make readahead confusing.  But race window
                       * is _really_ small and  it's non-critical problem.
                       */
    7                 add_page_to_lru_list(page, lruvec, lru);
    7                 SetPageReclaim(page);
              } else {
                      /*
                       * The page's writeback ends up during pagevec
                       * We moves tha page into tail of inactive.
                       */
   14                 add_page_to_lru_list_tail(page, lruvec, lru);
   14                 __count_vm_event(PGROTATED);
              }
      
   14         if (active)
                      __count_vm_event(PGDEACTIVATE);
              update_page_reclaim_stat(lruvec, file, 0);
      }
      
      static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
                                  void *arg)
      {
              if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
                      int file = page_is_file_lru(page);
                      int lru = page_lru_base_type(page);
      
                      del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
                      ClearPageActive(page);
                      ClearPageReferenced(page);
                      add_page_to_lru_list(page, lruvec, lru);
      
                      __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
                      update_page_reclaim_stat(lruvec, file, 0);
              }
      }
      
      static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
                                  void *arg)
      {
              if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
                  !PageSwapCache(page) && !PageUnevictable(page)) {
                      bool active = PageActive(page);
      
                      del_page_from_lru_list(page, lruvec,
                                             LRU_INACTIVE_ANON + active);
                      ClearPageActive(page);
                      ClearPageReferenced(page);
                      /*
                       * Lazyfree pages are clean anonymous pages.  They have
                       * PG_swapbacked flag cleared, to distinguish them from normal
                       * anonymous pages
                       */
                      ClearPageSwapBacked(page);
                      add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
      
                      __count_vm_events(PGLAZYFREE, hpage_nr_pages(page));
                      count_memcg_page_event(page, PGLAZYFREE);
                      update_page_reclaim_stat(lruvec, 1, 0);
              }
      }
      
      /*
       * Drain pages out of the cpu's pagevecs.
       * Either "cpu" is the current CPU, and preemption has already been
       * disabled; or "cpu" is being hot-unplugged, and is already dead.
       */
      void lru_add_drain_cpu(int cpu)
      {
  492         struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu);
      
              if (pagevec_count(pvec))
  485                 __pagevec_lru_add(pvec);
      
  492         pvec = &per_cpu(lru_rotate.pvec, cpu);
              if (pagevec_count(pvec)) {
                      unsigned long flags;
      
                      /* No harm done if a racing interrupt already did this */
    1                 local_lock_irqsave(&lru_rotate.lock, flags);
                      pagevec_move_tail(pvec);
    1                 local_unlock_irqrestore(&lru_rotate.lock, flags);
              }
      
  492         pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
              if (pagevec_count(pvec))
   13                 pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
      
  492         pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
              if (pagevec_count(pvec))
                      pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
      
  492         pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
              if (pagevec_count(pvec))
                      pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
      
  492         activate_page_drain(cpu);
      }
      
      /**
       * deactivate_file_page - forcefully deactivate a file page
       * @page: page to deactivate
       *
       * This function hints the VM that @page is a good reclaim candidate,
       * for example if its invalidation fails due to the page being dirty
       * or under writeback.
       */
      void deactivate_file_page(struct page *page)
      {
              /*
               * In a workload with many unevictable page such as mprotect,
               * unevictable page deactivation for accelerating reclaim is pointless.
               */
   17         if (PageUnevictable(page))
                      return;
      
   17         if (likely(get_page_unless_zero(page))) {
                      struct pagevec *pvec;
      
   17                 local_lock(&lru_pvecs.lock);
                      pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
      
   17                 if (!pagevec_add(pvec, page) || PageCompound(page))
    3                         pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
   17                 local_unlock(&lru_pvecs.lock);
              }
      }
      
      /*
       * deactivate_page - deactivate a page
       * @page: page to deactivate
       *
       * deactivate_page() moves @page to the inactive list if @page was on the active
       * list and was not an unevictable page.  This is done to accelerate the reclaim
       * of @page.
       */
      void deactivate_page(struct page *page)
      {
              if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
                      struct pagevec *pvec;
      
                      local_lock(&lru_pvecs.lock);
                      pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
                      get_page(page);
                      if (!pagevec_add(pvec, page) || PageCompound(page))
                              pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
                      local_unlock(&lru_pvecs.lock);
              }
      }
      
      /**
       * mark_page_lazyfree - make an anon page lazyfree
       * @page: page to deactivate
       *
       * mark_page_lazyfree() moves @page to the inactive file list.
       * This is done to accelerate the reclaim of @page.
       */
      void mark_page_lazyfree(struct page *page)
      {
              if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
                  !PageSwapCache(page) && !PageUnevictable(page)) {
                      struct pagevec *pvec;
      
                      local_lock(&lru_pvecs.lock);
                      pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
                      get_page(page);
                      if (!pagevec_add(pvec, page) || PageCompound(page))
                              pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
                      local_unlock(&lru_pvecs.lock);
              }
      }
      
      void lru_add_drain(void)
      {
  492         local_lock(&lru_pvecs.lock);
              lru_add_drain_cpu(smp_processor_id());
  492         local_unlock(&lru_pvecs.lock);
      }
      
      void lru_add_drain_cpu_zone(struct zone *zone)
      {
              local_lock(&lru_pvecs.lock);
              lru_add_drain_cpu(smp_processor_id());
              drain_local_pages(zone);
              local_unlock(&lru_pvecs.lock);
      }
      
      #ifdef CONFIG_SMP
      
      static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
      
      static void lru_add_drain_per_cpu(struct work_struct *dummy)
      {
              lru_add_drain();
      }
      
      /*
       * Doesn't need any cpu hotplug locking because we do rely on per-cpu
       * kworkers being shut down before our page_alloc_cpu_dead callback is
       * executed on the offlined cpu.
       * Calling this function with cpu hotplug locks held can actually lead
       * to obscure indirect dependencies via WQ context.
       */
      void lru_add_drain_all(void)
      {
              static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
              static DEFINE_MUTEX(lock);
              static struct cpumask has_work;
              int cpu, seq;
      
              /*
               * Make sure nobody triggers this path before mm_percpu_wq is fully
               * initialized.
               */
              if (WARN_ON(!mm_percpu_wq))
                      return;
      
              seq = raw_read_seqcount_latch(&seqcount);
      
              mutex_lock(&lock);
      
              /*
               * Piggyback on drain started and finished while we waited for lock:
               * all pages pended at the time of our enter were drained from vectors.
               */
              if (__read_seqcount_retry(&seqcount, seq))
                      goto done;
      
              raw_write_seqcount_latch(&seqcount);
      
              cpumask_clear(&has_work);
      
              for_each_online_cpu(cpu) {
                      struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
      
                      if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
                          pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
                          pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
                          pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
                          pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
                          need_activate_page_drain(cpu)) {
                              INIT_WORK(work, lru_add_drain_per_cpu);
                              queue_work_on(cpu, mm_percpu_wq, work);
                              cpumask_set_cpu(cpu, &has_work);
                      }
              }
      
              for_each_cpu(cpu, &has_work)
                      flush_work(&per_cpu(lru_add_drain_work, cpu));
      
      done:
              mutex_unlock(&lock);
      }
      #else
      void lru_add_drain_all(void)
      {
              lru_add_drain();
      }
      #endif
      
      /**
       * release_pages - batched put_page()
       * @pages: array of pages to release
       * @nr: number of pages
       *
       * Decrement the reference count on all the pages in @pages.  If it
       * fell to zero, remove the page from the LRU and free it.
       */
      void release_pages(struct page **pages, int nr)
      {
              int i;
 1267         LIST_HEAD(pages_to_free);
              struct pglist_data *locked_pgdat = NULL;
              struct lruvec *lruvec;
              unsigned long uninitialized_var(flags);
              unsigned int uninitialized_var(lock_batch);
      
 1267         for (i = 0; i < nr; i++) {
 1267                 struct page *page = pages[i];
      
                      /*
                       * Make sure the IRQ-safe lock-holding time does not get
                       * excessive with a continuous string of pages from the
                       * same pgdat. The lock is held only if pgdat != NULL.
                       */
  338                 if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
                              spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
                              locked_pgdat = NULL;
                      }
      
 1267                 if (is_huge_zero_page(page))
                              continue;
      
 1267                 if (is_zone_device_page(page)) {
                              if (locked_pgdat) {
                                      spin_unlock_irqrestore(&locked_pgdat->lru_lock,
                                                             flags);
                                      locked_pgdat = NULL;
                              }
                              /*
                               * ZONE_DEVICE pages that return 'false' from
                               * put_devmap_managed_page() do not require special
                               * processing, and instead, expect a call to
                               * put_page_testzero().
                               */
                              if (page_is_devmap_managed(page)) {
                                      put_devmap_managed_page(page);
                                      continue;
                              }
                      }
      
 1267                 page = compound_head(page);
 1267                 if (!put_page_testzero(page))
                              continue;
      
  406                 if (PageCompound(page)) {
                              if (locked_pgdat) {
                                      spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
                                      locked_pgdat = NULL;
                              }
                              __put_compound_page(page);
                              continue;
                      }
      
  406                 if (PageLRU(page)) {
  406                         struct pglist_data *pgdat = page_pgdat(page);
      
                              if (pgdat != locked_pgdat) {
  406                                 if (locked_pgdat)
                                              spin_unlock_irqrestore(&locked_pgdat->lru_lock,
                                                                              flags);
                                      lock_batch = 0;
                                      locked_pgdat = pgdat;
  406                                 spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
                              }
      
  406                         lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
  406                         VM_BUG_ON_PAGE(!PageLRU(page), page);
  406                         __ClearPageLRU(page);
  406                         del_page_from_lru_list(page, lruvec, page_off_lru(page));
                      }
      
                      /* Clear Active bit in case of parallel mark_page_accessed */
  406                 __ClearPageActive(page);
  406                 __ClearPageWaiters(page);
      
  406                 list_add(&page->lru, &pages_to_free);
              }
 1267         if (locked_pgdat)
  406                 spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
      
 1267         mem_cgroup_uncharge_list(&pages_to_free);
              free_unref_page_list(&pages_to_free);
      }
      EXPORT_SYMBOL(release_pages);
      
      /*
       * The pages which we're about to release may be in the deferred lru-addition
       * queues.  That would prevent them from really being freed right now.  That's
       * OK from a correctness point of view but is inefficient - those pages may be
       * cache-warm and we want to give them back to the page allocator ASAP.
       *
       * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
       * and __pagevec_lru_add_active() call release_pages() directly to avoid
       * mutual recursion.
       */
      void __pagevec_release(struct pagevec *pvec)
      {
  492         if (!pvec->percpu_pvec_drained) {
  492                 lru_add_drain();
                      pvec->percpu_pvec_drained = true;
              }
  492         release_pages(pvec->pages, pagevec_count(pvec));
              pagevec_reinit(pvec);
      }
      EXPORT_SYMBOL(__pagevec_release);
      
      #ifdef CONFIG_TRANSPARENT_HUGEPAGE
      /* used by __split_huge_page_refcount() */
      void lru_add_page_tail(struct page *page, struct page *page_tail,
                             struct lruvec *lruvec, struct list_head *list)
      {
              const int file = 0;
      
              VM_BUG_ON_PAGE(!PageHead(page), page);
              VM_BUG_ON_PAGE(PageCompound(page_tail), page);
              VM_BUG_ON_PAGE(PageLRU(page_tail), page);
              lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);
      
              if (!list)
                      SetPageLRU(page_tail);
      
              if (likely(PageLRU(page)))
                      list_add_tail(&page_tail->lru, &page->lru);
              else if (list) {
                      /* page reclaim is reclaiming a huge page */
                      get_page(page_tail);
                      list_add_tail(&page_tail->lru, list);
              } else {
                      /*
                       * Head page has not yet been counted, as an hpage,
                       * so we must account for each subpage individually.
                       *
                       * Put page_tail on the list at the correct position
                       * so they all end up in order.
                       */
                      add_page_to_lru_list_tail(page_tail, lruvec,
                                                page_lru(page_tail));
              }
      
              if (!PageUnevictable(page))
                      update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
      }
      #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
      
      static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
                                       void *arg)
      {
              enum lru_list lru;
 1266         int was_unevictable = TestClearPageUnevictable(page);
      
 1266         VM_BUG_ON_PAGE(PageLRU(page), page);
      
              /*
               * Page becomes evictable in two ways:
               * 1) Within LRU lock [munlock_vma_page() and __munlock_pagevec()].
               * 2) Before acquiring LRU lock to put the page to correct LRU and then
               *   a) do PageLRU check with lock [check_move_unevictable_pages]
               *   b) do PageLRU check before lock [clear_page_mlock]
               *
               * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need
               * following strict ordering:
               *
               * #0: __pagevec_lru_add_fn                #1: clear_page_mlock
               *
               * SetPageLRU()                                TestClearPageMlocked()
               * smp_mb() // explicit ordering        // above provides strict
               *                                        // ordering
               * PageMlocked()                        PageLRU()
               *
               *
               * if '#1' does not observe setting of PG_lru by '#0' and fails
               * isolation, the explicit barrier will make sure that page_evictable
               * check will put the page in correct LRU. Without smp_mb(), SetPageLRU
               * can be reordered after PageMlocked check and can make '#1' to fail
               * the isolation of the page whose Mlocked bit is cleared (#0 is also
               * looking at the same page) and the evictable page will be stranded
               * in an unevictable LRU.
               */
 1266         SetPageLRU(page);
              smp_mb__after_atomic();
      
 1266         if (page_evictable(page)) {
 1266                 lru = page_lru(page);
 1266                 update_page_reclaim_stat(lruvec, page_is_file_lru(page),
                                               PageActive(page));
 1266                 if (was_unevictable)
                              count_vm_event(UNEVICTABLE_PGRESCUED);
              } else {
                      lru = LRU_UNEVICTABLE;
                      ClearPageActive(page);
                      SetPageUnevictable(page);
                      if (!was_unevictable)
                              count_vm_event(UNEVICTABLE_PGCULLED);
              }
      
 1266         add_page_to_lru_list(page, lruvec, lru);
 1266         trace_mm_lru_insertion(page, lru);
      }
      
      /*
       * Add the passed pages to the LRU, then drop the caller's refcount
       * on them.  Reinitialises the caller's pagevec.
       */
      void __pagevec_lru_add(struct pagevec *pvec)
      {
 1266         pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL);
      }
      
      /**
       * pagevec_lookup_entries - gang pagecache lookup
       * @pvec:        Where the resulting entries are placed
       * @mapping:        The address_space to search
       * @start:        The starting entry index
       * @nr_entries:        The maximum number of pages
       * @indices:        The cache indices corresponding to the entries in @pvec
       *
       * pagevec_lookup_entries() will search for and return a group of up
       * to @nr_pages pages and shadow entries in the mapping.  All
       * entries are placed in @pvec.  pagevec_lookup_entries() takes a
       * reference against actual pages in @pvec.
       *
       * The search returns a group of mapping-contiguous entries with
       * ascending indexes.  There may be holes in the indices due to
       * not-present entries.
       *
       * Only one subpage of a Transparent Huge Page is returned in one call:
       * allowing truncate_inode_pages_range() to evict the whole THP without
       * cycling through a pagevec of extra references.
       *
       * pagevec_lookup_entries() returns the number of entries which were
       * found.
       */
      unsigned pagevec_lookup_entries(struct pagevec *pvec,
                                      struct address_space *mapping,
                                      pgoff_t start, unsigned nr_entries,
                                      pgoff_t *indices)
      {
              pvec->nr = find_get_entries(mapping, start, nr_entries,
  407                                     pvec->pages, indices);
              return pagevec_count(pvec);
      }
      
      /**
       * pagevec_remove_exceptionals - pagevec exceptionals pruning
       * @pvec:        The pagevec to prune
       *
       * pagevec_lookup_entries() fills both pages and exceptional radix
       * tree entries into the pagevec.  This function prunes all
       * exceptionals from @pvec without leaving holes, so that it can be
       * passed on to page-only pagevec operations.
       */
      void pagevec_remove_exceptionals(struct pagevec *pvec)
      {
              int i, j;
      
  275         for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
  275                 struct page *page = pvec->pages[i];
                      if (!xa_is_value(page))
  275                         pvec->pages[j++] = page;
              }
  275         pvec->nr = j;
      }
      
      /**
       * pagevec_lookup_range - gang pagecache lookup
       * @pvec:        Where the resulting pages are placed
       * @mapping:        The address_space to search
       * @start:        The starting page index
       * @end:        The final page index
       *
       * pagevec_lookup_range() will search for & return a group of up to PAGEVEC_SIZE
       * pages in the mapping starting from index @start and upto index @end
       * (inclusive).  The pages are placed in @pvec.  pagevec_lookup() takes a
       * reference against the pages in @pvec.
       *
       * The search returns a group of mapping-contiguous pages with ascending
       * indexes.  There may be holes in the indices due to not-present pages. We
       * also update @start to index the next page for the traversal.
       *
       * pagevec_lookup_range() returns the number of pages which were found. If this
       * number is smaller than PAGEVEC_SIZE, the end of specified range has been
       * reached.
       */
      unsigned pagevec_lookup_range(struct pagevec *pvec,
                      struct address_space *mapping, pgoff_t *start, pgoff_t end)
      {
              pvec->nr = find_get_pages_range(mapping, start, end, PAGEVEC_SIZE,
  361                                         pvec->pages);
              return pagevec_count(pvec);
      }
      EXPORT_SYMBOL(pagevec_lookup_range);
      
      unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
                      struct address_space *mapping, pgoff_t *index, pgoff_t end,
                      xa_mark_t tag)
      {
              pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
  686                                         PAGEVEC_SIZE, pvec->pages);
              return pagevec_count(pvec);
      }
      EXPORT_SYMBOL(pagevec_lookup_range_tag);
      
      unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
                      struct address_space *mapping, pgoff_t *index, pgoff_t end,
                      xa_mark_t tag, unsigned max_pages)
      {
              pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
                      min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
              return pagevec_count(pvec);
      }
      EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
      /*
       * Perform any setup for the swap system
       */
      void __init swap_setup(void)
      {
              unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
      
              /* Use a smaller cluster for small-memory machines */
              if (megs < 16)
                      page_cluster = 2;
              else
                      page_cluster = 3;
              /*
               * Right now other parts of the system means that we
               * _really_ don't want to cluster much more
               */
      }
      
      #ifdef CONFIG_DEV_PAGEMAP_OPS
      void put_devmap_managed_page(struct page *page)
      {
              int count;
      
              if (WARN_ON_ONCE(!page_is_devmap_managed(page)))
                      return;
      
              count = page_ref_dec_return(page);
      
              /*
               * devmap page refcounts are 1-based, rather than 0-based: if
               * refcount is 1, then the page is free and the refcount is
               * stable because nobody holds a reference on the page.
               */
              if (count == 1)
                      free_devmap_managed_page(page);
              else if (!count)
                      __put_page(page);
      }
      EXPORT_SYMBOL(put_devmap_managed_page);
      #endif
      /*
       * Copyright (c) 2018 Cumulus Networks. All rights reserved.
       * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com>
       * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
       *
       * This software is licensed under the GNU General License Version 2,
       * June 1991 as shown in the file COPYING in the top-level directory of this
       * source tree.
       *
       * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
       * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
       * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
       * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
       * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
       * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
       */
      
      #include <linux/debugfs.h>
      #include <linux/device.h>
      #include <linux/etherdevice.h>
      #include <linux/inet.h>
      #include <linux/jiffies.h>
      #include <linux/kernel.h>
      #include <linux/list.h>
      #include <linux/mutex.h>
      #include <linux/random.h>
      #include <linux/rtnetlink.h>
      #include <linux/workqueue.h>
      #include <net/devlink.h>
      #include <net/ip.h>
      #include <net/flow_offload.h>
      #include <uapi/linux/devlink.h>
      #include <uapi/linux/ip.h>
      #include <uapi/linux/udp.h>
      
      #include "netdevsim.h"
      
      static struct dentry *nsim_dev_ddir;
      
      #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
      
      static int
      nsim_dev_take_snapshot(struct devlink *devlink, struct netlink_ext_ack *extack,
                             u8 **data)
      {
              void *dummy_data;
      
              dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
              if (!dummy_data)
                      return -ENOMEM;
      
              get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
      
              *data = dummy_data;
      
              return 0;
      }
      
      static ssize_t nsim_dev_take_snapshot_write(struct file *file,
                                                  const char __user *data,
                                                  size_t count, loff_t *ppos)
      {
              struct nsim_dev *nsim_dev = file->private_data;
              struct devlink *devlink;
              u8 *dummy_data;
              int err;
              u32 id;
      
              devlink = priv_to_devlink(nsim_dev);
      
              err = nsim_dev_take_snapshot(devlink, NULL, &dummy_data);
              if (err)
                      return err;
      
              err = devlink_region_snapshot_id_get(devlink, &id);
              if (err) {
                      pr_err("Failed to get snapshot id\n");
                      kfree(dummy_data);
                      return err;
              }
              err = devlink_region_snapshot_create(nsim_dev->dummy_region,
                                                   dummy_data, id);
              devlink_region_snapshot_id_put(devlink, id);
              if (err) {
                      pr_err("Failed to create region snapshot\n");
                      kfree(dummy_data);
                      return err;
              }
      
              return count;
      }
      
      static const struct file_operations nsim_dev_take_snapshot_fops = {
              .open = simple_open,
              .write = nsim_dev_take_snapshot_write,
              .llseek = generic_file_llseek,
      };
      
      static ssize_t nsim_dev_trap_fa_cookie_read(struct file *file,
                                                  char __user *data,
                                                  size_t count, loff_t *ppos)
      {
              struct nsim_dev *nsim_dev = file->private_data;
              struct flow_action_cookie *fa_cookie;
              unsigned int buf_len;
              ssize_t ret;
              char *buf;
      
              spin_lock(&nsim_dev->fa_cookie_lock);
              fa_cookie = nsim_dev->fa_cookie;
              if (!fa_cookie) {
                      ret = -EINVAL;
                      goto errout;
              }
              buf_len = fa_cookie->cookie_len * 2;
              buf = kmalloc(buf_len, GFP_ATOMIC);
              if (!buf) {
                      ret = -ENOMEM;
                      goto errout;
              }
              bin2hex(buf, fa_cookie->cookie, fa_cookie->cookie_len);
              spin_unlock(&nsim_dev->fa_cookie_lock);
      
              ret = simple_read_from_buffer(data, count, ppos, buf, buf_len);
      
              kfree(buf);
              return ret;
      
      errout:
              spin_unlock(&nsim_dev->fa_cookie_lock);
              return ret;
      }
      
      static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file,
                                                   const char __user *data,
                                                   size_t count, loff_t *ppos)
      {
              struct nsim_dev *nsim_dev = file->private_data;
              struct flow_action_cookie *fa_cookie;
              size_t cookie_len;
              ssize_t ret;
              char *buf;
      
              if (*ppos != 0)
                      return -EINVAL;
              cookie_len = (count - 1) / 2;
              if ((count - 1) % 2)
                      return -EINVAL;
              buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
              if (!buf)
                      return -ENOMEM;
      
              ret = simple_write_to_buffer(buf, count, ppos, data, count);
              if (ret < 0)
                      goto free_buf;
      
              fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len,
                                  GFP_KERNEL | __GFP_NOWARN);
              if (!fa_cookie) {
                      ret = -ENOMEM;
                      goto free_buf;
              }
      
              fa_cookie->cookie_len = cookie_len;
              ret = hex2bin(fa_cookie->cookie, buf, cookie_len);
              if (ret)
                      goto free_fa_cookie;
              kfree(buf);
      
              spin_lock(&nsim_dev->fa_cookie_lock);
              kfree(nsim_dev->fa_cookie);
              nsim_dev->fa_cookie = fa_cookie;
              spin_unlock(&nsim_dev->fa_cookie_lock);
      
              return count;
      
      free_fa_cookie:
              kfree(fa_cookie);
      free_buf:
              kfree(buf);
              return ret;
      }
      
      static const struct file_operations nsim_dev_trap_fa_cookie_fops = {
              .open = simple_open,
              .read = nsim_dev_trap_fa_cookie_read,
              .write = nsim_dev_trap_fa_cookie_write,
              .llseek = generic_file_llseek,
      };
      
      static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
      {
              char dev_ddir_name[sizeof(DRV_NAME) + 10];
      
              sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id);
              nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir);
              if (IS_ERR(nsim_dev->ddir))
                      return PTR_ERR(nsim_dev->ddir);
              nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir);
              if (IS_ERR(nsim_dev->ports_ddir))
                      return PTR_ERR(nsim_dev->ports_ddir);
              debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir,
                                  &nsim_dev->fw_update_status);
              debugfs_create_u32("max_macs", 0600, nsim_dev->ddir,
                                 &nsim_dev->max_macs);
              debugfs_create_bool("test1", 0600, nsim_dev->ddir,
                                  &nsim_dev->test1);
              nsim_dev->take_snapshot = debugfs_create_file("take_snapshot",
                                                            0200,
                                                            nsim_dev->ddir,
                                                            nsim_dev,
                                                      &nsim_dev_take_snapshot_fops);
              debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir,
                                  &nsim_dev->dont_allow_reload);
              debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir,
                                  &nsim_dev->fail_reload);
              debugfs_create_file("trap_flow_action_cookie", 0600, nsim_dev->ddir,
                                  nsim_dev, &nsim_dev_trap_fa_cookie_fops);
              debugfs_create_bool("fail_trap_group_set", 0600,
                                  nsim_dev->ddir,
                                  &nsim_dev->fail_trap_group_set);
              debugfs_create_bool("fail_trap_policer_set", 0600,
                                  nsim_dev->ddir,
                                  &nsim_dev->fail_trap_policer_set);
              debugfs_create_bool("fail_trap_policer_counter_get", 0600,
                                  nsim_dev->ddir,
                                  &nsim_dev->fail_trap_policer_counter_get);
              return 0;
      }
      
      static void nsim_dev_debugfs_exit(struct nsim_dev *nsim_dev)
      {
              debugfs_remove_recursive(nsim_dev->ports_ddir);
              debugfs_remove_recursive(nsim_dev->ddir);
      }
      
      static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev,
                                            struct nsim_dev_port *nsim_dev_port)
      {
              char port_ddir_name[16];
              char dev_link_name[32];
      
              sprintf(port_ddir_name, "%u", nsim_dev_port->port_index);
              nsim_dev_port->ddir = debugfs_create_dir(port_ddir_name,
                                                       nsim_dev->ports_ddir);
              if (IS_ERR(nsim_dev_port->ddir))
                      return PTR_ERR(nsim_dev_port->ddir);
      
              sprintf(dev_link_name, "../../../" DRV_NAME "%u",
                      nsim_dev->nsim_bus_dev->dev.id);
              debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name);
      
              return 0;
      }
      
      static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port)
      {
              debugfs_remove_recursive(nsim_dev_port->ddir);
      }
      
      static int nsim_dev_resources_register(struct devlink *devlink)
      {
              struct devlink_resource_size_params params = {
                      .size_max = (u64)-1,
                      .size_granularity = 1,
                      .unit = DEVLINK_RESOURCE_UNIT_ENTRY
              };
              int err;
      
              /* Resources for IPv4 */
              err = devlink_resource_register(devlink, "IPv4", (u64)-1,
                                              NSIM_RESOURCE_IPV4,
                                              DEVLINK_RESOURCE_ID_PARENT_TOP,
                                              &params);
              if (err) {
                      pr_err("Failed to register IPv4 top resource\n");
                      goto out;
              }
      
              err = devlink_resource_register(devlink, "fib", (u64)-1,
                                              NSIM_RESOURCE_IPV4_FIB,
                                              NSIM_RESOURCE_IPV4, &params);
              if (err) {
                      pr_err("Failed to register IPv4 FIB resource\n");
                      return err;
              }
      
              err = devlink_resource_register(devlink, "fib-rules", (u64)-1,
                                              NSIM_RESOURCE_IPV4_FIB_RULES,
                                              NSIM_RESOURCE_IPV4, &params);
              if (err) {
                      pr_err("Failed to register IPv4 FIB rules resource\n");
                      return err;
              }
      
              /* Resources for IPv6 */
              err = devlink_resource_register(devlink, "IPv6", (u64)-1,
                                              NSIM_RESOURCE_IPV6,
                                              DEVLINK_RESOURCE_ID_PARENT_TOP,
                                              &params);
              if (err) {
                      pr_err("Failed to register IPv6 top resource\n");
                      goto out;
              }
      
              err = devlink_resource_register(devlink, "fib", (u64)-1,
                                              NSIM_RESOURCE_IPV6_FIB,
                                              NSIM_RESOURCE_IPV6, &params);
              if (err) {
                      pr_err("Failed to register IPv6 FIB resource\n");
                      return err;
              }
      
              err = devlink_resource_register(devlink, "fib-rules", (u64)-1,
                                              NSIM_RESOURCE_IPV6_FIB_RULES,
                                              NSIM_RESOURCE_IPV6, &params);
              if (err) {
                      pr_err("Failed to register IPv6 FIB rules resource\n");
                      return err;
              }
      
      out:
              return err;
      }
      
      enum nsim_devlink_param_id {
              NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
              NSIM_DEVLINK_PARAM_ID_TEST1,
      };
      
      static const struct devlink_param nsim_devlink_params[] = {
              DEVLINK_PARAM_GENERIC(MAX_MACS,
                                    BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
                                    NULL, NULL, NULL),
              DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1,
                                   "test1", DEVLINK_PARAM_TYPE_BOOL,
                                   BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
                                   NULL, NULL, NULL),
      };
      
      static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev,
                                                      struct devlink *devlink)
      {
              union devlink_param_value value;
      
              value.vu32 = nsim_dev->max_macs;
              devlink_param_driverinit_value_set(devlink,
                                                 DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
                                                 value);
              value.vbool = nsim_dev->test1;
              devlink_param_driverinit_value_set(devlink,
                                                 NSIM_DEVLINK_PARAM_ID_TEST1,
                                                 value);
      }
      
      static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
              union devlink_param_value saved_value;
              int err;
      
              err = devlink_param_driverinit_value_get(devlink,
                                                       DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
                                                       &saved_value);
              if (!err)
                      nsim_dev->max_macs = saved_value.vu32;
              err = devlink_param_driverinit_value_get(devlink,
                                                       NSIM_DEVLINK_PARAM_ID_TEST1,
                                                       &saved_value);
              if (!err)
                      nsim_dev->test1 = saved_value.vbool;
      }
      
      #define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16
      
      static const struct devlink_region_ops dummy_region_ops = {
              .name = "dummy",
              .destructor = &kfree,
              .snapshot = nsim_dev_take_snapshot,
      };
      
      static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev,
                                            struct devlink *devlink)
      {
              nsim_dev->dummy_region =
                      devlink_region_create(devlink, &dummy_region_ops,
                                            NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX,
                                            NSIM_DEV_DUMMY_REGION_SIZE);
              return PTR_ERR_OR_ZERO(nsim_dev->dummy_region);
      }
      
      static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
      {
              devlink_region_destroy(nsim_dev->dummy_region);
      }
      
      struct nsim_trap_item {
              void *trap_ctx;
              enum devlink_trap_action action;
      };
      
      struct nsim_trap_data {
              struct delayed_work trap_report_dw;
              struct nsim_trap_item *trap_items_arr;
              u64 *trap_policers_cnt_arr;
              struct nsim_dev *nsim_dev;
              spinlock_t trap_lock;        /* Protects trap_items_arr */
      };
      
      /* All driver-specific traps must be documented in
       * Documentation/networking/devlink/netdevsim.rst
       */
      enum {
              NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
              NSIM_TRAP_ID_FID_MISS,
      };
      
      #define NSIM_TRAP_NAME_FID_MISS "fid_miss"
      
      #define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
      
      #define NSIM_TRAP_DROP(_id, _group_id)                                              \
              DEVLINK_TRAP_GENERIC(DROP, DROP, _id,                                      \
                                   DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,              \
                                   NSIM_TRAP_METADATA)
      #define NSIM_TRAP_DROP_EXT(_id, _group_id, _metadata)                              \
              DEVLINK_TRAP_GENERIC(DROP, DROP, _id,                                      \
                                   DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,              \
                                   NSIM_TRAP_METADATA | (_metadata))
      #define NSIM_TRAP_EXCEPTION(_id, _group_id)                                      \
              DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,                              \
                                   DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,              \
                                   NSIM_TRAP_METADATA)
      #define NSIM_TRAP_CONTROL(_id, _group_id, _action)                              \
              DEVLINK_TRAP_GENERIC(CONTROL, _action, _id,                              \
                                   DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,              \
                                   NSIM_TRAP_METADATA)
      #define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id)                              \
              DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id,              \
                                  NSIM_TRAP_NAME_##_id,                              \
                                  DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,              \
                                  NSIM_TRAP_METADATA)
      
      #define NSIM_DEV_TRAP_POLICER_MIN_RATE        1
      #define NSIM_DEV_TRAP_POLICER_MAX_RATE        8000
      #define NSIM_DEV_TRAP_POLICER_MIN_BURST        8
      #define NSIM_DEV_TRAP_POLICER_MAX_BURST        65536
      
      #define NSIM_TRAP_POLICER(_id, _rate, _burst)                                      \
              DEVLINK_TRAP_POLICER(_id, _rate, _burst,                              \
                                   NSIM_DEV_TRAP_POLICER_MAX_RATE,                      \
                                   NSIM_DEV_TRAP_POLICER_MIN_RATE,                      \
                                   NSIM_DEV_TRAP_POLICER_MAX_BURST,                      \
                                   NSIM_DEV_TRAP_POLICER_MIN_BURST)
      
      static const struct devlink_trap_policer nsim_trap_policers_arr[] = {
              NSIM_TRAP_POLICER(1, 1000, 128),
              NSIM_TRAP_POLICER(2, 2000, 256),
              NSIM_TRAP_POLICER(3, 3000, 512),
      };
      
      static const struct devlink_trap_group nsim_trap_groups_arr[] = {
              DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
              DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
              DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1),
              DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 2),
              DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 3),
              DEVLINK_TRAP_GROUP_GENERIC(MC_SNOOPING, 3),
      };
      
      static const struct devlink_trap nsim_traps_arr[] = {
              NSIM_TRAP_DROP(SMAC_MC, L2_DROPS),
              NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
              NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
              NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
              NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
              NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
              NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
              NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
              NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS),
              NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
              NSIM_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS,
                                 DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
              NSIM_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS,
                                 DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
              NSIM_TRAP_CONTROL(IGMP_QUERY, MC_SNOOPING, MIRROR),
              NSIM_TRAP_CONTROL(IGMP_V1_REPORT, MC_SNOOPING, TRAP),
      };
      
      #define NSIM_TRAP_L4_DATA_LEN 100
      
      static struct sk_buff *nsim_dev_trap_skb_build(void)
      {
              int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN;
              struct sk_buff *skb;
              struct udphdr *udph;
              struct ethhdr *eth;
              struct iphdr *iph;
      
              skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
              if (!skb)
                      return NULL;
              tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
      
              skb_reset_mac_header(skb);
              eth = skb_put(skb, sizeof(struct ethhdr));
              eth_random_addr(eth->h_dest);
              eth_random_addr(eth->h_source);
              eth->h_proto = htons(ETH_P_IP);
              skb->protocol = htons(ETH_P_IP);
      
              skb_set_network_header(skb, skb->len);
              iph = skb_put(skb, sizeof(struct iphdr));
              iph->protocol = IPPROTO_UDP;
              iph->saddr = in_aton("192.0.2.1");
              iph->daddr = in_aton("198.51.100.1");
              iph->version = 0x4;
              iph->frag_off = 0;
              iph->ihl = 0x5;
              iph->tot_len = htons(tot_len);
              iph->ttl = 100;
              iph->check = 0;
              iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
      
              skb_set_transport_header(skb, skb->len);
              udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
              get_random_bytes(&udph->source, sizeof(u16));
              get_random_bytes(&udph->dest, sizeof(u16));
              udph->len = htons(sizeof(struct udphdr) + data_len);
      
              return skb;
      }
      
      static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port)
      {
              struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev;
              struct devlink *devlink = priv_to_devlink(nsim_dev);
              struct nsim_trap_data *nsim_trap_data;
              int i;
      
              nsim_trap_data = nsim_dev->trap_data;
      
              spin_lock(&nsim_trap_data->trap_lock);
              for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
                      struct flow_action_cookie *fa_cookie = NULL;
                      struct nsim_trap_item *nsim_trap_item;
                      struct sk_buff *skb;
                      bool has_fa_cookie;
      
                      has_fa_cookie = nsim_traps_arr[i].metadata_cap &
                                      DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE;
      
                      nsim_trap_item = &nsim_trap_data->trap_items_arr[i];
                      if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP)
                              continue;
      
                      skb = nsim_dev_trap_skb_build();
                      if (!skb)
                              continue;
                      skb->dev = nsim_dev_port->ns->netdev;
      
                      /* Trapped packets are usually passed to devlink in softIRQ,
                       * but in this case they are generated in a workqueue. Disable
                       * softIRQs to prevent lockdep from complaining about
                       * "incosistent lock state".
                       */
      
                      spin_lock_bh(&nsim_dev->fa_cookie_lock);
                      fa_cookie = has_fa_cookie ? nsim_dev->fa_cookie : NULL;
                      devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx,
                                          &nsim_dev_port->devlink_port, fa_cookie);
                      spin_unlock_bh(&nsim_dev->fa_cookie_lock);
                      consume_skb(skb);
              }
              spin_unlock(&nsim_trap_data->trap_lock);
      }
      
      #define NSIM_TRAP_REPORT_INTERVAL_MS        100
      
      static void nsim_dev_trap_report_work(struct work_struct *work)
      {
              struct nsim_trap_data *nsim_trap_data;
              struct nsim_dev_port *nsim_dev_port;
              struct nsim_dev *nsim_dev;
      
              nsim_trap_data = container_of(work, struct nsim_trap_data,
                                            trap_report_dw.work);
              nsim_dev = nsim_trap_data->nsim_dev;
      
              /* For each running port and enabled packet trap, generate a UDP
               * packet with a random 5-tuple and report it.
               */
              mutex_lock(&nsim_dev->port_list_lock);
              list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
                      if (!netif_running(nsim_dev_port->ns->netdev))
                              continue;
      
                      nsim_dev_trap_report(nsim_dev_port);
              }
              mutex_unlock(&nsim_dev->port_list_lock);
      
              schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
                                    msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
      }
      
      static int nsim_dev_traps_init(struct devlink *devlink)
      {
              size_t policers_count = ARRAY_SIZE(nsim_trap_policers_arr);
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
              struct nsim_trap_data *nsim_trap_data;
              int err;
      
              nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL);
              if (!nsim_trap_data)
                      return -ENOMEM;
      
              nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr),
                                                       sizeof(struct nsim_trap_item),
                                                       GFP_KERNEL);
              if (!nsim_trap_data->trap_items_arr) {
                      err = -ENOMEM;
                      goto err_trap_data_free;
              }
      
              nsim_trap_data->trap_policers_cnt_arr = kcalloc(policers_count,
                                                              sizeof(u64),
                                                              GFP_KERNEL);
              if (!nsim_trap_data->trap_policers_cnt_arr) {
                      err = -ENOMEM;
                      goto err_trap_items_free;
              }
      
              /* The lock is used to protect the action state of the registered
               * traps. The value is written by user and read in delayed work when
               * iterating over all the traps.
               */
              spin_lock_init(&nsim_trap_data->trap_lock);
              nsim_trap_data->nsim_dev = nsim_dev;
              nsim_dev->trap_data = nsim_trap_data;
      
              err = devlink_trap_policers_register(devlink, nsim_trap_policers_arr,
                                                   policers_count);
              if (err)
                      goto err_trap_policers_cnt_free;
      
              err = devlink_trap_groups_register(devlink, nsim_trap_groups_arr,
                                                 ARRAY_SIZE(nsim_trap_groups_arr));
              if (err)
                      goto err_trap_policers_unregister;
      
              err = devlink_traps_register(devlink, nsim_traps_arr,
                                           ARRAY_SIZE(nsim_traps_arr), NULL);
              if (err)
                      goto err_trap_groups_unregister;
      
              INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw,
                                nsim_dev_trap_report_work);
              schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
                                    msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
      
              return 0;
      
      err_trap_groups_unregister:
              devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr,
                                             ARRAY_SIZE(nsim_trap_groups_arr));
      err_trap_policers_unregister:
              devlink_trap_policers_unregister(devlink, nsim_trap_policers_arr,
                                               ARRAY_SIZE(nsim_trap_policers_arr));
      err_trap_policers_cnt_free:
              kfree(nsim_trap_data->trap_policers_cnt_arr);
      err_trap_items_free:
              kfree(nsim_trap_data->trap_items_arr);
      err_trap_data_free:
              kfree(nsim_trap_data);
              return err;
      }
      
      static void nsim_dev_traps_exit(struct devlink *devlink)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
      
              cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw);
              devlink_traps_unregister(devlink, nsim_traps_arr,
                                       ARRAY_SIZE(nsim_traps_arr));
              devlink_trap_groups_unregister(devlink, nsim_trap_groups_arr,
                                             ARRAY_SIZE(nsim_trap_groups_arr));
              devlink_trap_policers_unregister(devlink, nsim_trap_policers_arr,
                                               ARRAY_SIZE(nsim_trap_policers_arr));
              kfree(nsim_dev->trap_data->trap_policers_cnt_arr);
              kfree(nsim_dev->trap_data->trap_items_arr);
              kfree(nsim_dev->trap_data);
      }
      
      static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
                                        struct netlink_ext_ack *extack);
      static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev);
      
      static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
                                      struct netlink_ext_ack *extack)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
      
              if (nsim_dev->dont_allow_reload) {
                      /* For testing purposes, user set debugfs dont_allow_reload
                       * value to true. So forbid it.
                       */
                      NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
                      return -EOPNOTSUPP;
              }
      
              nsim_dev_reload_destroy(nsim_dev);
              return 0;
      }
      
      static int nsim_dev_reload_up(struct devlink *devlink,
                                    struct netlink_ext_ack *extack)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
      
              if (nsim_dev->fail_reload) {
                      /* For testing purposes, user set debugfs fail_reload
                       * value to true. Fail right away.
                       */
                      NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
                      return -EINVAL;
              }
      
              return nsim_dev_reload_create(nsim_dev, extack);
      }
      
      static int nsim_dev_info_get(struct devlink *devlink,
                                   struct devlink_info_req *req,
                                   struct netlink_ext_ack *extack)
      {
    4         return devlink_info_driver_name_put(req, DRV_NAME);
      }
      
      #define NSIM_DEV_FLASH_SIZE 500000
      #define NSIM_DEV_FLASH_CHUNK_SIZE 1000
      #define NSIM_DEV_FLASH_CHUNK_TIME_MS 10
      
      static int nsim_dev_flash_update(struct devlink *devlink, const char *file_name,
                                       const char *component,
                                       struct netlink_ext_ack *extack)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
              int i;
      
              if (nsim_dev->fw_update_status) {
                      devlink_flash_update_begin_notify(devlink);
                      devlink_flash_update_status_notify(devlink,
                                                         "Preparing to flash",
                                                         component, 0, 0);
              }
      
              for (i = 0; i < NSIM_DEV_FLASH_SIZE / NSIM_DEV_FLASH_CHUNK_SIZE; i++) {
                      if (nsim_dev->fw_update_status)
                              devlink_flash_update_status_notify(devlink, "Flashing",
                                                                 component,
                                                                 i * NSIM_DEV_FLASH_CHUNK_SIZE,
                                                                 NSIM_DEV_FLASH_SIZE);
                      msleep(NSIM_DEV_FLASH_CHUNK_TIME_MS);
              }
      
              if (nsim_dev->fw_update_status) {
                      devlink_flash_update_status_notify(devlink, "Flashing",
                                                         component,
                                                         NSIM_DEV_FLASH_SIZE,
                                                         NSIM_DEV_FLASH_SIZE);
                      devlink_flash_update_status_notify(devlink, "Flashing done",
                                                         component, 0, 0);
                      devlink_flash_update_end_notify(devlink);
              }
      
              return 0;
      }
      
      static struct nsim_trap_item *
      nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id)
      {
              struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data;
              int i;
      
              for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
                      if (nsim_traps_arr[i].id == trap_id)
                              return &nsim_trap_data->trap_items_arr[i];
              }
      
              return NULL;
      }
      
      static int nsim_dev_devlink_trap_init(struct devlink *devlink,
                                            const struct devlink_trap *trap,
                                            void *trap_ctx)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
              struct nsim_trap_item *nsim_trap_item;
      
              nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
              if (WARN_ON(!nsim_trap_item))
                      return -ENOENT;
      
              nsim_trap_item->trap_ctx = trap_ctx;
              nsim_trap_item->action = trap->init_action;
      
              return 0;
      }
      
      static int
      nsim_dev_devlink_trap_action_set(struct devlink *devlink,
                                       const struct devlink_trap *trap,
                                       enum devlink_trap_action action)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
              struct nsim_trap_item *nsim_trap_item;
      
              nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
              if (WARN_ON(!nsim_trap_item))
                      return -ENOENT;
      
              spin_lock(&nsim_dev->trap_data->trap_lock);
              nsim_trap_item->action = action;
              spin_unlock(&nsim_dev->trap_data->trap_lock);
      
              return 0;
      }
      
      static int
      nsim_dev_devlink_trap_group_set(struct devlink *devlink,
                                      const struct devlink_trap_group *group,
                                      const struct devlink_trap_policer *policer)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
      
              if (nsim_dev->fail_trap_group_set)
                      return -EINVAL;
      
              return 0;
      }
      
      static int
      nsim_dev_devlink_trap_policer_set(struct devlink *devlink,
                                        const struct devlink_trap_policer *policer,
                                        u64 rate, u64 burst,
                                        struct netlink_ext_ack *extack)
      {
              struct nsim_dev *nsim_dev = devlink_priv(devlink);
      
              if (nsim_dev->fail_trap_policer_set) {
                      NL_SET_ERR_MSG_MOD(extack, "User setup the operation to fail for testing purposes");
                      return -EINVAL;
              }
      
              return 0;
      }
      
      static int
      nsim_dev_devlink_trap_policer_counter_get(struct devlink *devlink,
                                                const struct devlink_trap_policer *policer,
                                                u64 *p_drops)
      {
    5         struct nsim_dev *nsim_dev = devlink_priv(devlink);
              u64 *cnt;
      
    5         if (nsim_dev->fail_trap_policer_counter_get)
                      return -EINVAL;
      
    5         cnt = &nsim_dev->trap_data->trap_policers_cnt_arr[policer->id - 1];
              *p_drops = (*cnt)++;
      
    5         return 0;
      }
      
      static const struct devlink_ops nsim_dev_devlink_ops = {
              .reload_down = nsim_dev_reload_down,
              .reload_up = nsim_dev_reload_up,
              .info_get = nsim_dev_info_get,
              .flash_update = nsim_dev_flash_update,
              .trap_init = nsim_dev_devlink_trap_init,
              .trap_action_set = nsim_dev_devlink_trap_action_set,
              .trap_group_set = nsim_dev_devlink_trap_group_set,
              .trap_policer_set = nsim_dev_devlink_trap_policer_set,
              .trap_policer_counter_get = nsim_dev_devlink_trap_policer_counter_get,
      };
      
      #define NSIM_DEV_MAX_MACS_DEFAULT 32
      #define NSIM_DEV_TEST1_DEFAULT true
      
      static int __nsim_dev_port_add(struct nsim_dev *nsim_dev,
                                     unsigned int port_index)
      {
              struct nsim_dev_port *nsim_dev_port;
              struct devlink_port *devlink_port;
              int err;
      
              nsim_dev_port = kzalloc(sizeof(*nsim_dev_port), GFP_KERNEL);
              if (!nsim_dev_port)
                      return -ENOMEM;
              nsim_dev_port->port_index = port_index;
      
              devlink_port = &nsim_dev_port->devlink_port;
              devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
                                     port_index + 1, 0, 0,
                                     nsim_dev->switch_id.id,
                                     nsim_dev->switch_id.id_len);
              err = devlink_port_register(priv_to_devlink(nsim_dev), devlink_port,
                                          port_index);
              if (err)
                      goto err_port_free;
      
              err = nsim_dev_port_debugfs_init(nsim_dev, nsim_dev_port);
              if (err)
                      goto err_dl_port_unregister;
      
              nsim_dev_port->ns = nsim_create(nsim_dev, nsim_dev_port);
              if (IS_ERR(nsim_dev_port->ns)) {
                      err = PTR_ERR(nsim_dev_port->ns);
                      goto err_port_debugfs_exit;
              }
      
              devlink_port_type_eth_set(devlink_port, nsim_dev_port->ns->netdev);
              list_add(&nsim_dev_port->list, &nsim_dev->port_list);
      
              return 0;
      
      err_port_debugfs_exit:
              nsim_dev_port_debugfs_exit(nsim_dev_port);
      err_dl_port_unregister:
              devlink_port_unregister(devlink_port);
      err_port_free:
              kfree(nsim_dev_port);
              return err;
      }
      
      static void __nsim_dev_port_del(struct nsim_dev_port *nsim_dev_port)
      {
              struct devlink_port *devlink_port = &nsim_dev_port->devlink_port;
      
              list_del(&nsim_dev_port->list);
              devlink_port_type_clear(devlink_port);
              nsim_destroy(nsim_dev_port->ns);
              nsim_dev_port_debugfs_exit(nsim_dev_port);
              devlink_port_unregister(devlink_port);
              kfree(nsim_dev_port);
      }
      
      static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
      {
              struct nsim_dev_port *nsim_dev_port, *tmp;
      
              mutex_lock(&nsim_dev->port_list_lock);
              list_for_each_entry_safe(nsim_dev_port, tmp,
                                       &nsim_dev->port_list, list)
                      __nsim_dev_port_del(nsim_dev_port);
              mutex_unlock(&nsim_dev->port_list_lock);
      }
      
      static int nsim_dev_port_add_all(struct nsim_dev *nsim_dev,
                                       unsigned int port_count)
      {
              int i, err;
      
              for (i = 0; i < port_count; i++) {
                      err = __nsim_dev_port_add(nsim_dev, i);
                      if (err)
                              goto err_port_del_all;
              }
              return 0;
      
      err_port_del_all:
              nsim_dev_port_del_all(nsim_dev);
              return err;
      }
      
      static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
                                        struct netlink_ext_ack *extack)
      {
              struct nsim_bus_dev *nsim_bus_dev = nsim_dev->nsim_bus_dev;
              struct devlink *devlink;
              int err;
      
              devlink = priv_to_devlink(nsim_dev);
              nsim_dev = devlink_priv(devlink);
              INIT_LIST_HEAD(&nsim_dev->port_list);
              mutex_init(&nsim_dev->port_list_lock);
              nsim_dev->fw_update_status = true;
      
              nsim_dev->fib_data = nsim_fib_create(devlink, extack);
              if (IS_ERR(nsim_dev->fib_data))
                      return PTR_ERR(nsim_dev->fib_data);
      
              nsim_devlink_param_load_driverinit_values(devlink);
      
              err = nsim_dev_dummy_region_init(nsim_dev, devlink);
              if (err)
                      goto err_fib_destroy;
      
              err = nsim_dev_traps_init(devlink);
              if (err)
                      goto err_dummy_region_exit;
      
              err = nsim_dev_health_init(nsim_dev, devlink);
              if (err)
                      goto err_traps_exit;
      
              err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
              if (err)
                      goto err_health_exit;
      
              nsim_dev->take_snapshot = debugfs_create_file("take_snapshot",
                                                            0200,
                                                            nsim_dev->ddir,
                                                            nsim_dev,
                                                      &nsim_dev_take_snapshot_fops);
              return 0;
      
      err_health_exit:
              nsim_dev_health_exit(nsim_dev);
      err_traps_exit:
              nsim_dev_traps_exit(devlink);
      err_dummy_region_exit:
              nsim_dev_dummy_region_exit(nsim_dev);
      err_fib_destroy:
              nsim_fib_destroy(devlink, nsim_dev->fib_data);
              return err;
      }
      
      int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
      {
              struct nsim_dev *nsim_dev;
              struct devlink *devlink;
              int err;
      
              devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev));
              if (!devlink)
                      return -ENOMEM;
              devlink_net_set(devlink, nsim_bus_dev->initial_net);
              nsim_dev = devlink_priv(devlink);
              nsim_dev->nsim_bus_dev = nsim_bus_dev;
              nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id);
              get_random_bytes(nsim_dev->switch_id.id, nsim_dev->switch_id.id_len);
              INIT_LIST_HEAD(&nsim_dev->port_list);
              mutex_init(&nsim_dev->port_list_lock);
              nsim_dev->fw_update_status = true;
              nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT;
              nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
              spin_lock_init(&nsim_dev->fa_cookie_lock);
      
              dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev);
      
              err = nsim_dev_resources_register(devlink);
              if (err)
                      goto err_devlink_free;
      
              nsim_dev->fib_data = nsim_fib_create(devlink, NULL);
              if (IS_ERR(nsim_dev->fib_data)) {
                      err = PTR_ERR(nsim_dev->fib_data);
                      goto err_resources_unregister;
              }
      
              err = devlink_register(devlink, &nsim_bus_dev->dev);
              if (err)
                      goto err_fib_destroy;
      
              err = devlink_params_register(devlink, nsim_devlink_params,
                                            ARRAY_SIZE(nsim_devlink_params));
              if (err)
                      goto err_dl_unregister;
              nsim_devlink_set_params_init_values(nsim_dev, devlink);
      
              err = nsim_dev_dummy_region_init(nsim_dev, devlink);
              if (err)
                      goto err_params_unregister;
      
              err = nsim_dev_traps_init(devlink);
              if (err)
                      goto err_dummy_region_exit;
      
              err = nsim_dev_debugfs_init(nsim_dev);
              if (err)
                      goto err_traps_exit;
      
              err = nsim_dev_health_init(nsim_dev, devlink);
              if (err)
                      goto err_debugfs_exit;
      
              err = nsim_bpf_dev_init(nsim_dev);
              if (err)
                      goto err_health_exit;
      
              err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
              if (err)
                      goto err_bpf_dev_exit;
      
              devlink_params_publish(devlink);
              devlink_reload_enable(devlink);
              return 0;
      
      err_bpf_dev_exit:
              nsim_bpf_dev_exit(nsim_dev);
      err_health_exit:
              nsim_dev_health_exit(nsim_dev);
      err_debugfs_exit:
              nsim_dev_debugfs_exit(nsim_dev);
      err_traps_exit:
              nsim_dev_traps_exit(devlink);
      err_dummy_region_exit:
              nsim_dev_dummy_region_exit(nsim_dev);
      err_params_unregister:
              devlink_params_unregister(devlink, nsim_devlink_params,
                                        ARRAY_SIZE(nsim_devlink_params));
      err_dl_unregister:
              devlink_unregister(devlink);
      err_fib_destroy:
              nsim_fib_destroy(devlink, nsim_dev->fib_data);
      err_resources_unregister:
              devlink_resources_unregister(devlink, NULL);
      err_devlink_free:
              devlink_free(devlink);
              return err;
      }
      
      static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
      {
              struct devlink *devlink = priv_to_devlink(nsim_dev);
      
              if (devlink_is_reload_failed(devlink))
                      return;
              debugfs_remove(nsim_dev->take_snapshot);
              nsim_dev_port_del_all(nsim_dev);
              nsim_dev_health_exit(nsim_dev);
              nsim_dev_traps_exit(devlink);
              nsim_dev_dummy_region_exit(nsim_dev);
              mutex_destroy(&nsim_dev->port_list_lock);
              nsim_fib_destroy(devlink, nsim_dev->fib_data);
      }
      
      void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev)
      {
              struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
              struct devlink *devlink = priv_to_devlink(nsim_dev);
      
              devlink_reload_disable(devlink);
      
              nsim_dev_reload_destroy(nsim_dev);
      
              nsim_bpf_dev_exit(nsim_dev);
              nsim_dev_debugfs_exit(nsim_dev);
              devlink_params_unregister(devlink, nsim_devlink_params,
                                        ARRAY_SIZE(nsim_devlink_params));
              devlink_unregister(devlink);
              devlink_resources_unregister(devlink, NULL);
              devlink_free(devlink);
      }
      
      static struct nsim_dev_port *
      __nsim_dev_port_lookup(struct nsim_dev *nsim_dev, unsigned int port_index)
      {
              struct nsim_dev_port *nsim_dev_port;
      
              list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list)
                      if (nsim_dev_port->port_index == port_index)
                              return nsim_dev_port;
              return NULL;
      }
      
      int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev,
                            unsigned int port_index)
      {
              struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
              int err;
      
              mutex_lock(&nsim_dev->port_list_lock);
              if (__nsim_dev_port_lookup(nsim_dev, port_index))
                      err = -EEXIST;
              else
                      err = __nsim_dev_port_add(nsim_dev, port_index);
              mutex_unlock(&nsim_dev->port_list_lock);
              return err;
      }
      
      int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev,
                            unsigned int port_index)
      {
              struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev);
              struct nsim_dev_port *nsim_dev_port;
              int err = 0;
      
              mutex_lock(&nsim_dev->port_list_lock);
              nsim_dev_port = __nsim_dev_port_lookup(nsim_dev, port_index);
              if (!nsim_dev_port)
                      err = -ENOENT;
              else
                      __nsim_dev_port_del(nsim_dev_port);
              mutex_unlock(&nsim_dev->port_list_lock);
              return err;
      }
      
      int nsim_dev_init(void)
      {
              nsim_dev_ddir = debugfs_create_dir(DRV_NAME, NULL);
              return PTR_ERR_OR_ZERO(nsim_dev_ddir);
      }
      
      void nsim_dev_exit(void)
      {
              debugfs_remove_recursive(nsim_dev_ddir);
      }
      // SPDX-License-Identifier: GPL-2.0-or-later
      /*
       *        X.25 Packet Layer release 002
       *
       *        This is ALPHA test software. This code may break your machine,
       *        randomly fail to work with new releases, misbehave and/or generally
       *        screw up. It might even work.
       *
       *        This code REQUIRES 2.1.15 or higher
       *
       *        History
       *        X.25 001        Jonathan Naylor        Started coding.
       *        X.25 002        Jonathan Naylor        Centralised disconnect handling.
       *                                        New timer architecture.
       *        2000-03-11        Henner Eisen        MSG_EOR handling more POSIX compliant.
       *        2000-03-22        Daniela Squassoni Allowed disabling/enabling of
       *                                          facilities negotiation and increased
       *                                          the throughput upper limit.
       *        2000-08-27        Arnaldo C. Melo s/suser/capable/ + micro cleanups
       *        2000-09-04        Henner Eisen        Set sock->state in x25_accept().
       *                                        Fixed x25_output() related skb leakage.
       *        2000-10-02        Henner Eisen        Made x25_kick() single threaded per socket.
       *        2000-10-27        Henner Eisen    MSG_DONTWAIT for fragment allocation.
       *        2000-11-14        Henner Eisen    Closing datalink from NETDEV_GOING_DOWN
       *        2002-10-06        Arnaldo C. Melo Get rid of cli/sti, move proc stuff to
       *                                        x25_proc.c, using seq_file
       *        2005-04-02        Shaun Pereira        Selective sub address matching
       *                                        with call user data
       *        2005-04-15        Shaun Pereira        Fast select with no restriction on
       *                                        response
       */
      
      #define pr_fmt(fmt) "X25: " fmt
      
      #include <linux/module.h>
      #include <linux/capability.h>
      #include <linux/errno.h>
      #include <linux/kernel.h>
      #include <linux/sched/signal.h>
      #include <linux/timer.h>
      #include <linux/string.h>
      #include <linux/net.h>
      #include <linux/netdevice.h>
      #include <linux/if_arp.h>
      #include <linux/skbuff.h>
      #include <linux/slab.h>
      #include <net/sock.h>
      #include <net/tcp_states.h>
      #include <linux/uaccess.h>
      #include <linux/fcntl.h>
      #include <linux/termios.h>        /* For TIOCINQ/OUTQ */
      #include <linux/notifier.h>
      #include <linux/init.h>
      #include <linux/compat.h>
      #include <linux/ctype.h>
      
      #include <net/x25.h>
      #include <net/compat.h>
      
      int sysctl_x25_restart_request_timeout = X25_DEFAULT_T20;
      int sysctl_x25_call_request_timeout    = X25_DEFAULT_T21;
      int sysctl_x25_reset_request_timeout   = X25_DEFAULT_T22;
      int sysctl_x25_clear_request_timeout   = X25_DEFAULT_T23;
      int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
      int sysctl_x25_forward                 = 0;
      
      HLIST_HEAD(x25_list);
      DEFINE_RWLOCK(x25_list_lock);
      
      static const struct proto_ops x25_proto_ops;
      
      static const struct x25_address null_x25_address = {"               "};
      
      #ifdef CONFIG_COMPAT
      struct compat_x25_subscrip_struct {
              char device[200-sizeof(compat_ulong_t)];
              compat_ulong_t global_facil_mask;
              compat_uint_t extended;
      };
      #endif
      
      
      int x25_parse_address_block(struct sk_buff *skb,
                      struct x25_address *called_addr,
                      struct x25_address *calling_addr)
      {
              unsigned char len;
              int needed;
              int rc;
      
              if (!pskb_may_pull(skb, 1)) {
                      /* packet has no address block */
                      rc = 0;
                      goto empty;
              }
      
              len = *skb->data;
              needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
      
              if (!pskb_may_pull(skb, needed)) {
                      /* packet is too short to hold the addresses it claims
                         to hold */
                      rc = -1;
                      goto empty;
              }
      
              return x25_addr_ntoa(skb->data, called_addr, calling_addr);
      
      empty:
              *called_addr->x25_addr = 0;
              *calling_addr->x25_addr = 0;
      
              return rc;
      }
      
      
      int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr,
                        struct x25_address *calling_addr)
      {
              unsigned int called_len, calling_len;
              char *called, *calling;
              unsigned int i;
      
              called_len  = (*p >> 0) & 0x0F;
              calling_len = (*p >> 4) & 0x0F;
      
              called  = called_addr->x25_addr;
              calling = calling_addr->x25_addr;
              p++;
      
              for (i = 0; i < (called_len + calling_len); i++) {
                      if (i < called_len) {
                              if (i % 2 != 0) {
                                      *called++ = ((*p >> 0) & 0x0F) + '0';
                                      p++;
                              } else {
                                      *called++ = ((*p >> 4) & 0x0F) + '0';
                              }
                      } else {
                              if (i % 2 != 0) {
                                      *calling++ = ((*p >> 0) & 0x0F) + '0';
                                      p++;
                              } else {
                                      *calling++ = ((*p >> 4) & 0x0F) + '0';
                              }
                      }
              }
      
              *called = *calling = '\0';
      
              return 1 + (called_len + calling_len + 1) / 2;
      }
      
      int x25_addr_aton(unsigned char *p, struct x25_address *called_addr,
                        struct x25_address *calling_addr)
      {
              unsigned int called_len, calling_len;
              char *called, *calling;
              int i;
      
              called  = called_addr->x25_addr;
              calling = calling_addr->x25_addr;
      
              called_len  = strlen(called);
              calling_len = strlen(calling);
      
              *p++ = (calling_len << 4) | (called_len << 0);
      
              for (i = 0; i < (called_len + calling_len); i++) {
                      if (i < called_len) {
                              if (i % 2 != 0) {
                                      *p |= (*called++ - '0') << 0;
                                      p++;
                              } else {
                                      *p = 0x00;
                                      *p |= (*called++ - '0') << 4;
                              }
                      } else {
                              if (i % 2 != 0) {
                                      *p |= (*calling++ - '0') << 0;
                                      p++;
                              } else {
                                      *p = 0x00;
                                      *p |= (*calling++ - '0') << 4;
                              }
                      }
              }
      
              return 1 + (called_len + calling_len + 1) / 2;
      }
      
      /*
       *        Socket removal during an interrupt is now safe.
       */
      static void x25_remove_socket(struct sock *sk)
      {
              write_lock_bh(&x25_list_lock);
              sk_del_node_init(sk);
              write_unlock_bh(&x25_list_lock);
      }
      
      /*
       *        Kill all bound sockets on a dropped device.
       */
      static void x25_kill_by_device(struct net_device *dev)
      {
              struct sock *s;
      
              write_lock_bh(&x25_list_lock);
      
              sk_for_each(s, &x25_list)
                      if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev)
                              x25_disconnect(s, ENETUNREACH, 0, 0);
      
              write_unlock_bh(&x25_list_lock);
      }
      
      /*
       *        Handle device status changes.
       */
      static int x25_device_event(struct notifier_block *this, unsigned long event,
                                  void *ptr)
      {
 1312         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
              struct x25_neigh *nb;
      
 1312         if (!net_eq(dev_net(dev), &init_net))
                      return NOTIFY_DONE;
      
              if (dev->type == ARPHRD_X25
      #if IS_ENABLED(CONFIG_LLC)
               || dev->type == ARPHRD_ETHER
      #endif
               ) {
                      switch (event) {
                      case NETDEV_UP:
                              x25_link_device_up(dev);
                              break;
                      case NETDEV_GOING_DOWN:
                              nb = x25_get_neigh(dev);
                              if (nb) {
                                      x25_terminate_link(nb);
                                      x25_neigh_put(nb);
                              }
                              break;
                      case NETDEV_DOWN:
                              x25_kill_by_device(dev);
                              x25_route_device_down(dev);
                              x25_link_device_down(dev);
                              break;
                      }
              }
      
              return NOTIFY_DONE;
      }
      
      /*
       *        Add a socket to the bound sockets list.
       */
      static void x25_insert_socket(struct sock *sk)
      {
              write_lock_bh(&x25_list_lock);
              sk_add_node(sk, &x25_list);
              write_unlock_bh(&x25_list_lock);
      }
      
      /*
       *        Find a socket that wants to accept the Call Request we just
       *        received. Check the full list for an address/cud match.
       *        If no cuds match return the next_best thing, an address match.
       *        Note: if a listening socket has cud set it must only get calls
       *        with matching cud.
       */
      static struct sock *x25_find_listener(struct x25_address *addr,
                                              struct sk_buff *skb)
      {
              struct sock *s;
              struct sock *next_best;
      
              read_lock_bh(&x25_list_lock);
              next_best = NULL;
      
              sk_for_each(s, &x25_list)
                      if ((!strcmp(addr->x25_addr,
                              x25_sk(s)->source_addr.x25_addr) ||
                                      !strcmp(x25_sk(s)->source_addr.x25_addr,
                                              null_x25_address.x25_addr)) &&
                                              s->sk_state == TCP_LISTEN) {
                              /*
                               * Found a listening socket, now check the incoming
                               * call user data vs this sockets call user data
                               */
                              if (x25_sk(s)->cudmatchlength > 0 &&
                                      skb->len >= x25_sk(s)->cudmatchlength) {
                                      if((memcmp(x25_sk(s)->calluserdata.cuddata,
                                              skb->data,
                                              x25_sk(s)->cudmatchlength)) == 0) {
                                              sock_hold(s);
                                              goto found;
                                       }
                              } else
                                      next_best = s;
                      }
              if (next_best) {
                      s = next_best;
                      sock_hold(s);
                      goto found;
              }
              s = NULL;
      found:
              read_unlock_bh(&x25_list_lock);
              return s;
      }
      
      /*
       *        Find a connected X.25 socket given my LCI and neighbour.
       */
      static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb)
      {
              struct sock *s;
      
              sk_for_each(s, &x25_list)
                      if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) {
                              sock_hold(s);
                              goto found;
                      }
              s = NULL;
      found:
              return s;
      }
      
      struct sock *x25_find_socket(unsigned int lci, struct x25_neigh *nb)
      {
              struct sock *s;
      
              read_lock_bh(&x25_list_lock);
              s = __x25_find_socket(lci, nb);
              read_unlock_bh(&x25_list_lock);
              return s;
      }
      
      /*
       *        Find a unique LCI for a given device.
       */
      static unsigned int x25_new_lci(struct x25_neigh *nb)
      {
              unsigned int lci = 1;
              struct sock *sk;
      
              while ((sk = x25_find_socket(lci, nb)) != NULL) {
                      sock_put(sk);
                      if (++lci == 4096) {
                              lci = 0;
                              break;
                      }
                      cond_resched();
              }
      
              return lci;
      }
      
      /*
       *        Deferred destroy.
       */
      static void __x25_destroy_socket(struct sock *);
      
      /*
       *        handler for deferred kills.
       */
      static void x25_destroy_timer(struct timer_list *t)
      {
              struct sock *sk = from_timer(sk, t, sk_timer);
      
              x25_destroy_socket_from_timer(sk);
      }
      
      /*
       *        This is called from user mode and the timers. Thus it protects itself
       *        against interrupt users but doesn't worry about being called during
       *        work. Once it is removed from the queue no interrupt or bottom half
       *        will touch it and we are (fairly 8-) ) safe.
       *        Not static as it's used by the timer
       */
      static void __x25_destroy_socket(struct sock *sk)
      {
              struct sk_buff *skb;
      
              x25_stop_heartbeat(sk);
              x25_stop_timer(sk);
      
              x25_remove_socket(sk);
              x25_clear_queues(sk);                /* Flush the queues */
      
              while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
                      if (skb->sk != sk) {                /* A pending connection */
                              /*
                               * Queue the unaccepted socket for death
                               */
                              skb->sk->sk_state = TCP_LISTEN;
                              sock_set_flag(skb->sk, SOCK_DEAD);
                              x25_start_heartbeat(skb->sk);
                              x25_sk(skb->sk)->state = X25_STATE_0;
                      }
      
                      kfree_skb(skb);
              }
      
              if (sk_has_allocations(sk)) {
                      /* Defer: outstanding buffers */
                      sk->sk_timer.expires  = jiffies + 10 * HZ;
                      sk->sk_timer.function = x25_destroy_timer;
                      add_timer(&sk->sk_timer);
              } else {
                      /* drop last reference so sock_put will free */
                      __sock_put(sk);
              }
      }
      
      void x25_destroy_socket_from_timer(struct sock *sk)
      {
              sock_hold(sk);
              bh_lock_sock(sk);
              __x25_destroy_socket(sk);
              bh_unlock_sock(sk);
              sock_put(sk);
      }
      
      /*
       *        Handling for system calls applied via the various interfaces to a
       *        X.25 socket object.
       */
      
      static int x25_setsockopt(struct socket *sock, int level, int optname,
                                char __user *optval, unsigned int optlen)
      {
              int opt;
              struct sock *sk = sock->sk;
              int rc = -ENOPROTOOPT;
      
              if (level != SOL_X25 || optname != X25_QBITINCL)
                      goto out;
      
              rc = -EINVAL;
              if (optlen < sizeof(int))
                      goto out;
      
              rc = -EFAULT;
              if (get_user(opt, (int __user *)optval))
                      goto out;
      
              if (opt)
                      set_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
              else
                      clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
              rc = 0;
      out:
              return rc;
      }
      
      static int x25_getsockopt(struct socket *sock, int level, int optname,
                                char __user *optval, int __user *optlen)
      {