/*
       * mm/rmap.c - physical to virtual reverse mappings
       *
       * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
       * Released under the General Public License (GPL).
       *
       * Simple, low overhead reverse mapping scheme.
       * Please try to keep this thing as modular as possible.
       *
       * Provides methods for unmapping each kind of mapped page:
       * the anon methods track anonymous pages, and
       * the file methods track pages belonging to an inode.
       *
       * Original design by Rik van Riel <riel@conectiva.com.br> 2001
       * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
       * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
       * Contributions by Hugh Dickins 2003, 2004
       */
      
      /*
       * Lock ordering in mm:
       *
       * inode->i_mutex        (while writing or truncating, not reading or faulting)
       *   mm->mmap_lock
       *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
       *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
       *         mapping->i_mmap_rwsem
       *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
       *           anon_vma->rwsem
       *             mm->page_table_lock or pte_lock
       *               swap_lock (in swap_duplicate, swap_info_get)
       *                 mmlist_lock (in mmput, drain_mmlist and others)
       *                 mapping->private_lock (in __set_page_dirty_buffers)
       *                   lock_page_memcg move_lock (in __set_page_dirty_buffers)
       *                     i_pages lock (widely used)
       *                       lruvec->lru_lock (in lock_page_lruvec_irq)
       *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
       *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
       *                   sb_lock (within inode_lock in fs/fs-writeback.c)
       *                   i_pages lock (widely used, in set_page_dirty,
       *                             in arch-dependent flush_dcache_mmap_lock,
       *                             within bdi.wb->list_lock in __sync_single_inode)
       *
       * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
       *   ->tasklist_lock
       *     pte map lock
       *
       * * hugetlbfs PageHuge() pages take locks in this order:
       *         mapping->i_mmap_rwsem
       *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
       *             page->flags PG_locked (lock_page)
       */
      
      #include <linux/mm.h>
      #include <linux/sched/mm.h>
      #include <linux/sched/task.h>
      #include <linux/pagemap.h>
      #include <linux/swap.h>
      #include <linux/swapops.h>
      #include <linux/slab.h>
      #include <linux/init.h>
      #include <linux/ksm.h>
      #include <linux/rmap.h>
      #include <linux/rcupdate.h>
      #include <linux/export.h>
      #include <linux/memcontrol.h>
      #include <linux/mmu_notifier.h>
      #include <linux/migrate.h>
      #include <linux/hugetlb.h>
      #include <linux/huge_mm.h>
      #include <linux/backing-dev.h>
      #include <linux/page_idle.h>
      #include <linux/memremap.h>
      #include <linux/userfaultfd_k.h>
      
      #include <asm/tlbflush.h>
      
      #include <trace/events/tlb.h>
      
      #include "internal.h"
      
      static struct kmem_cache *anon_vma_cachep;
      static struct kmem_cache *anon_vma_chain_cachep;
      
      static inline struct anon_vma *anon_vma_alloc(void)
      {
              struct anon_vma *anon_vma;
      
              anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
              if (anon_vma) {
                      atomic_set(&anon_vma->refcount, 1);
                      anon_vma->degree = 1;        /* Reference for first vma */
                      anon_vma->parent = anon_vma;
                      /*
                       * Initialise the anon_vma root to point to itself. If called
                       * from fork, the root will be reset to the parents anon_vma.
                       */
                      anon_vma->root = anon_vma;
              }
      
              return anon_vma;
      }
      
      static inline void anon_vma_free(struct anon_vma *anon_vma)
      {
              VM_BUG_ON(atomic_read(&anon_vma->refcount));
      
              /*
               * Synchronize against page_lock_anon_vma_read() such that
               * we can safely hold the lock without the anon_vma getting
               * freed.
               *
               * Relies on the full mb implied by the atomic_dec_and_test() from
               * put_anon_vma() against the acquire barrier implied by
               * down_read_trylock() from page_lock_anon_vma_read(). This orders:
               *
               * page_lock_anon_vma_read()        VS        put_anon_vma()
               *   down_read_trylock()                  atomic_dec_and_test()
               *   LOCK                                  MB
               *   atomic_read()                          rwsem_is_locked()
               *
               * LOCK should suffice since the actual taking of the lock must
               * happen _before_ what follows.
               */
              might_sleep();
              if (rwsem_is_locked(&anon_vma->root->rwsem)) {
                      anon_vma_lock_write(anon_vma);
                      anon_vma_unlock_write(anon_vma);
              }
      
              kmem_cache_free(anon_vma_cachep, anon_vma);
      }
      
      static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
      {
              return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
      }
      
      static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
      {
              kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
      }
      
      static void anon_vma_chain_link(struct vm_area_struct *vma,
                                      struct anon_vma_chain *avc,
                                      struct anon_vma *anon_vma)
      {
              avc->vma = vma;
              avc->anon_vma = anon_vma;
              list_add(&avc->same_vma, &vma->anon_vma_chain);
              anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
      }
      
      /**
       * __anon_vma_prepare - attach an anon_vma to a memory region
       * @vma: the memory region in question
       *
       * This makes sure the memory mapping described by 'vma' has
       * an 'anon_vma' attached to it, so that we can associate the
       * anonymous pages mapped into it with that anon_vma.
       *
       * The common case will be that we already have one, which
       * is handled inline by anon_vma_prepare(). But if
       * not we either need to find an adjacent mapping that we
       * can re-use the anon_vma from (very common when the only
       * reason for splitting a vma has been mprotect()), or we
       * allocate a new one.
       *
       * Anon-vma allocations are very subtle, because we may have
       * optimistically looked up an anon_vma in page_lock_anon_vma_read()
       * and that may actually touch the spinlock even in the newly
       * allocated vma (it depends on RCU to make sure that the
       * anon_vma isn't actually destroyed).
       *
       * As a result, we need to do proper anon_vma locking even
       * for the new allocation. At the same time, we do not want
       * to do any locking for the common case of already having
       * an anon_vma.
       *
       * This must be called with the mmap_lock held for reading.
       */
      int __anon_vma_prepare(struct vm_area_struct *vma)
      {
              struct mm_struct *mm = vma->vm_mm;
              struct anon_vma *anon_vma, *allocated;
              struct anon_vma_chain *avc;
      
              might_sleep();
      
              avc = anon_vma_chain_alloc(GFP_KERNEL);
              if (!avc)
                      goto out_enomem;
      
              anon_vma = find_mergeable_anon_vma(vma);
              allocated = NULL;
              if (!anon_vma) {
                      anon_vma = anon_vma_alloc();
                      if (unlikely(!anon_vma))
                              goto out_enomem_free_avc;
                      allocated = anon_vma;
              }
      
              anon_vma_lock_write(anon_vma);
              /* page_table_lock to protect against threads */
              spin_lock(&mm->page_table_lock);
              if (likely(!vma->anon_vma)) {
                      vma->anon_vma = anon_vma;
                      anon_vma_chain_link(vma, avc, anon_vma);
                      /* vma reference or self-parent link for new root */
                      anon_vma->degree++;
                      allocated = NULL;
                      avc = NULL;
              }
              spin_unlock(&mm->page_table_lock);
              anon_vma_unlock_write(anon_vma);
      
              if (unlikely(allocated))
                      put_anon_vma(allocated);
              if (unlikely(avc))
                      anon_vma_chain_free(avc);
      
              return 0;
      
       out_enomem_free_avc:
              anon_vma_chain_free(avc);
       out_enomem:
              return -ENOMEM;
      }
      
      /*
       * This is a useful helper function for locking the anon_vma root as
       * we traverse the vma->anon_vma_chain, looping over anon_vma's that
       * have the same vma.
       *
       * Such anon_vma's should have the same root, so you'd expect to see
       * just a single mutex_lock for the whole traversal.
       */
      static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
      {
              struct anon_vma *new_root = anon_vma->root;
              if (new_root != root) {
                      if (WARN_ON_ONCE(root))
                              up_write(&root->rwsem);
                      root = new_root;
                      down_write(&root->rwsem);
              }
              return root;
      }
      
      static inline void unlock_anon_vma_root(struct anon_vma *root)
      {
              if (root)
                      up_write(&root->rwsem);
      }
      
      /*
       * Attach the anon_vmas from src to dst.
       * Returns 0 on success, -ENOMEM on failure.
       *
       * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
       * anon_vma_fork(). The first three want an exact copy of src, while the last
       * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
       * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
       * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
       *
       * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
       * and reuse existing anon_vma which has no vmas and only one child anon_vma.
       * This prevents degradation of anon_vma hierarchy to endless linear chain in
       * case of constantly forking task. On the other hand, an anon_vma with more
       * than one child isn't reused even if there was no alive vma, thus rmap
       * walker has a good chance of avoiding scanning the whole hierarchy when it
       * searches where page is mapped.
       */
      int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
      {
              struct anon_vma_chain *avc, *pavc;
              struct anon_vma *root = NULL;
      
              list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
                      struct anon_vma *anon_vma;
      
                      avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
                      if (unlikely(!avc)) {
                              unlock_anon_vma_root(root);
                              root = NULL;
                              avc = anon_vma_chain_alloc(GFP_KERNEL);
                              if (!avc)
                                      goto enomem_failure;
                      }
                      anon_vma = pavc->anon_vma;
                      root = lock_anon_vma_root(root, anon_vma);
                      anon_vma_chain_link(dst, avc, anon_vma);
      
                      /*
                       * Reuse existing anon_vma if its degree lower than two,
                       * that means it has no vma and only one anon_vma child.
                       *
                       * Do not chose parent anon_vma, otherwise first child
                       * will always reuse it. Root anon_vma is never reused:
                       * it has self-parent reference and at least one child.
                       */
                      if (!dst->anon_vma && src->anon_vma &&
                          anon_vma != src->anon_vma && anon_vma->degree < 2)
                              dst->anon_vma = anon_vma;
              }
              if (dst->anon_vma)
                      dst->anon_vma->degree++;
              unlock_anon_vma_root(root);
              return 0;
      
       enomem_failure:
              /*
               * dst->anon_vma is dropped here otherwise its degree can be incorrectly
               * decremented in unlink_anon_vmas().
               * We can safely do this because callers of anon_vma_clone() don't care
               * about dst->anon_vma if anon_vma_clone() failed.
               */
              dst->anon_vma = NULL;
              unlink_anon_vmas(dst);
              return -ENOMEM;
      }
      
      /*
       * Attach vma to its own anon_vma, as well as to the anon_vmas that
       * the corresponding VMA in the parent process is attached to.
       * Returns 0 on success, non-zero on failure.
       */
      int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
      {
              struct anon_vma_chain *avc;
              struct anon_vma *anon_vma;
              int error;
      
              /* Don't bother if the parent process has no anon_vma here. */
              if (!pvma->anon_vma)
                      return 0;
      
              /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
              vma->anon_vma = NULL;
      
              /*
               * First, attach the new VMA to the parent VMA's anon_vmas,
               * so rmap can find non-COWed pages in child processes.
               */
              error = anon_vma_clone(vma, pvma);
              if (error)
                      return error;
      
              /* An existing anon_vma has been reused, all done then. */
              if (vma->anon_vma)
                      return 0;
      
              /* Then add our own anon_vma. */
              anon_vma = anon_vma_alloc();
              if (!anon_vma)
                      goto out_error;
              avc = anon_vma_chain_alloc(GFP_KERNEL);
              if (!avc)
                      goto out_error_free_anon_vma;
      
              /*
               * The root anon_vma's spinlock is the lock actually used when we
               * lock any of the anon_vmas in this anon_vma tree.
               */
              anon_vma->root = pvma->anon_vma->root;
              anon_vma->parent = pvma->anon_vma;
              /*
               * With refcounts, an anon_vma can stay around longer than the
               * process it belongs to. The root anon_vma needs to be pinned until
               * this anon_vma is freed, because the lock lives in the root.
               */
              get_anon_vma(anon_vma->root);
              /* Mark this anon_vma as the one where our new (COWed) pages go. */
              vma->anon_vma = anon_vma;
              anon_vma_lock_write(anon_vma);
              anon_vma_chain_link(vma, avc, anon_vma);
              anon_vma->parent->degree++;
              anon_vma_unlock_write(anon_vma);
      
              return 0;
      
       out_error_free_anon_vma:
              put_anon_vma(anon_vma);
       out_error:
              unlink_anon_vmas(vma);
              return -ENOMEM;
      }
      
      void unlink_anon_vmas(struct vm_area_struct *vma)
      {
              struct anon_vma_chain *avc, *next;
              struct anon_vma *root = NULL;
      
              /*
               * Unlink each anon_vma chained to the VMA.  This list is ordered
               * from newest to oldest, ensuring the root anon_vma gets freed last.
               */
              list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                      struct anon_vma *anon_vma = avc->anon_vma;
      
                      root = lock_anon_vma_root(root, anon_vma);
                      anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
      
                      /*
                       * Leave empty anon_vmas on the list - we'll need
                       * to free them outside the lock.
                       */
                      if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
                              anon_vma->parent->degree--;
                              continue;
                      }
      
                      list_del(&avc->same_vma);
                      anon_vma_chain_free(avc);
              }
              if (vma->anon_vma)
                      vma->anon_vma->degree--;
              unlock_anon_vma_root(root);
      
              /*
               * Iterate the list once more, it now only contains empty and unlinked
               * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
               * needing to write-acquire the anon_vma->root->rwsem.
               */
              list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                      struct anon_vma *anon_vma = avc->anon_vma;
      
                      VM_WARN_ON(anon_vma->degree);
                      put_anon_vma(anon_vma);
      
                      list_del(&avc->same_vma);
                      anon_vma_chain_free(avc);
              }
      }
      
      static void anon_vma_ctor(void *data)
      {
              struct anon_vma *anon_vma = data;
      
              init_rwsem(&anon_vma->rwsem);
              atomic_set(&anon_vma->refcount, 0);
              anon_vma->rb_root = RB_ROOT_CACHED;
      }
      
      void __init anon_vma_init(void)
      {
              anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
                              0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
                              anon_vma_ctor);
              anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
                              SLAB_PANIC|SLAB_ACCOUNT);
      }
      
      /*
       * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
       *
       * Since there is no serialization what so ever against page_remove_rmap()
       * the best this function can do is return a locked anon_vma that might
       * have been relevant to this page.
       *
       * The page might have been remapped to a different anon_vma or the anon_vma
       * returned may already be freed (and even reused).
       *
       * In case it was remapped to a different anon_vma, the new anon_vma will be a
       * child of the old anon_vma, and the anon_vma lifetime rules will therefore
       * ensure that any anon_vma obtained from the page will still be valid for as
       * long as we observe page_mapped() [ hence all those page_mapped() tests ].
       *
       * All users of this function must be very careful when walking the anon_vma
       * chain and verify that the page in question is indeed mapped in it
       * [ something equivalent to page_mapped_in_vma() ].
       *
       * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
       * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
       * if there is a mapcount, we can dereference the anon_vma after observing
       * those.
       */
      struct anon_vma *page_get_anon_vma(struct page *page)
      {
              struct anon_vma *anon_vma = NULL;
              unsigned long anon_mapping;
      
              rcu_read_lock();
              anon_mapping = (unsigned long)READ_ONCE(page->mapping);
              if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                      goto out;
              if (!page_mapped(page))
                      goto out;
      
              anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
              if (!atomic_inc_not_zero(&anon_vma->refcount)) {
                      anon_vma = NULL;
                      goto out;
              }
      
              /*
               * If this page is still mapped, then its anon_vma cannot have been
               * freed.  But if it has been unmapped, we have no security against the
               * anon_vma structure being freed and reused (for another anon_vma:
               * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
               * above cannot corrupt).
               */
              if (!page_mapped(page)) {
                      rcu_read_unlock();
                      put_anon_vma(anon_vma);
                      return NULL;
              }
      out:
              rcu_read_unlock();
      
              return anon_vma;
      }
      
      /*
       * Similar to page_get_anon_vma() except it locks the anon_vma.
       *
       * Its a little more complex as it tries to keep the fast path to a single
       * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
       * reference like with page_get_anon_vma() and then block on the mutex.
       */
      struct anon_vma *page_lock_anon_vma_read(struct page *page)
      {
              struct anon_vma *anon_vma = NULL;
              struct anon_vma *root_anon_vma;
              unsigned long anon_mapping;
      
              rcu_read_lock();
              anon_mapping = (unsigned long)READ_ONCE(page->mapping);
              if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
                      goto out;
              if (!page_mapped(page))
                      goto out;
      
              anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
              root_anon_vma = READ_ONCE(anon_vma->root);
              if (down_read_trylock(&root_anon_vma->rwsem)) {
                      /*
                       * If the page is still mapped, then this anon_vma is still
                       * its anon_vma, and holding the mutex ensures that it will
                       * not go away, see anon_vma_free().
                       */
                      if (!page_mapped(page)) {
                              up_read(&root_anon_vma->rwsem);
                              anon_vma = NULL;
                      }
                      goto out;
              }
      
              /* trylock failed, we got to sleep */
              if (!atomic_inc_not_zero(&anon_vma->refcount)) {
                      anon_vma = NULL;
                      goto out;
              }
      
              if (!page_mapped(page)) {
                      rcu_read_unlock();
                      put_anon_vma(anon_vma);
                      return NULL;
              }
      
              /* we pinned the anon_vma, its safe to sleep */
              rcu_read_unlock();
              anon_vma_lock_read(anon_vma);
      
              if (atomic_dec_and_test(&anon_vma->refcount)) {
                      /*
                       * Oops, we held the last refcount, release the lock
                       * and bail -- can't simply use put_anon_vma() because
                       * we'll deadlock on the anon_vma_lock_write() recursion.
                       */
                      anon_vma_unlock_read(anon_vma);
                      __put_anon_vma(anon_vma);
                      anon_vma = NULL;
              }
      
              return anon_vma;
      
      out:
              rcu_read_unlock();
              return anon_vma;
      }
      
      void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
      {
              anon_vma_unlock_read(anon_vma);
      }
      
      #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
      /*
       * Flush TLB entries for recently unmapped pages from remote CPUs. It is
       * important if a PTE was dirty when it was unmapped that it's flushed
       * before any IO is initiated on the page to prevent lost writes. Similarly,
       * it must be flushed before freeing to prevent data leakage.
       */
      void try_to_unmap_flush(void)
      {
              struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
      
              if (!tlb_ubc->flush_required)
                      return;
      
              arch_tlbbatch_flush(&tlb_ubc->arch);
              tlb_ubc->flush_required = false;
              tlb_ubc->writable = false;
      }
      
      /* Flush iff there are potentially writable TLB entries that can race with IO */
      void try_to_unmap_flush_dirty(void)
      {
              struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
      
              if (tlb_ubc->writable)
                      try_to_unmap_flush();
      }
      
      static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
      {
              struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
      
              arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
              tlb_ubc->flush_required = true;
      
              /*
               * Ensure compiler does not re-order the setting of tlb_flush_batched
               * before the PTE is cleared.
               */
              barrier();
              mm->tlb_flush_batched = true;
      
              /*
               * If the PTE was dirty then it's best to assume it's writable. The
               * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
               * before the page is queued for IO.
               */
              if (writable)
                      tlb_ubc->writable = true;
      }
      
      /*
       * Returns true if the TLB flush should be deferred to the end of a batch of
       * unmap operations to reduce IPIs.
       */
      static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
      {
              bool should_defer = false;
      
              if (!(flags & TTU_BATCH_FLUSH))
                      return false;
      
              /* If remote CPUs need to be flushed then defer batch the flush */
              if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
                      should_defer = true;
              put_cpu();
      
              return should_defer;
      }
      
      /*
       * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
       * releasing the PTL if TLB flushes are batched. It's possible for a parallel
       * operation such as mprotect or munmap to race between reclaim unmapping
       * the page and flushing the page. If this race occurs, it potentially allows
       * access to data via a stale TLB entry. Tracking all mm's that have TLB
       * batching in flight would be expensive during reclaim so instead track
       * whether TLB batching occurred in the past and if so then do a flush here
       * if required. This will cost one additional flush per reclaim cycle paid
       * by the first operation at risk such as mprotect and mumap.
       *
       * This must be called under the PTL so that an access to tlb_flush_batched
       * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
       * via the PTL.
       */
      void flush_tlb_batched_pending(struct mm_struct *mm)
      {
              if (data_race(mm->tlb_flush_batched)) {
                      flush_tlb_mm(mm);
      
                      /*
                       * Do not allow the compiler to re-order the clearing of
                       * tlb_flush_batched before the tlb is flushed.
                       */
                      barrier();
                      mm->tlb_flush_batched = false;
              }
      }
      #else
      static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
      {
      }
      
      static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
      {
              return false;
      }
      #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
      
      /*
       * At what user virtual address is page expected in vma?
       * Caller should check the page is actually part of the vma.
       */
      unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
      {
              unsigned long address;
              if (PageAnon(page)) {
                      struct anon_vma *page__anon_vma = page_anon_vma(page);
                      /*
                       * Note: swapoff's unuse_vma() is more efficient with this
                       * check, and needs it to match anon_vma when KSM is active.
                       */
                      if (!vma->anon_vma || !page__anon_vma ||
                          vma->anon_vma->root != page__anon_vma->root)
                              return -EFAULT;
              } else if (page->mapping) {
                      if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
                              return -EFAULT;
              } else
                      return -EFAULT;
              address = __vma_address(page, vma);
              if (unlikely(address < vma->vm_start || address >= vma->vm_end))
                      return -EFAULT;
              return address;
      }
      
      pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
      {
              pgd_t *pgd;
              p4d_t *p4d;
              pud_t *pud;
              pmd_t *pmd = NULL;
              pmd_t pmde;
      
              pgd = pgd_offset(mm, address);
              if (!pgd_present(*pgd))
                      goto out;
      
              p4d = p4d_offset(pgd, address);
              if (!p4d_present(*p4d))
                      goto out;
      
              pud = pud_offset(p4d, address);
              if (!pud_present(*pud))
                      goto out;
      
              pmd = pmd_offset(pud, address);
              /*
               * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
               * without holding anon_vma lock for write.  So when looking for a
               * genuine pmde (in which to find pte), test present and !THP together.
               */
              pmde = *pmd;
              barrier();
              if (!pmd_present(pmde) || pmd_trans_huge(pmde))
                      pmd = NULL;
      out:
              return pmd;
      }
      
      struct page_referenced_arg {
              int mapcount;
              int referenced;
              unsigned long vm_flags;
              struct mem_cgroup *memcg;
      };
      /*
       * arg: page_referenced_arg will be passed
       */
      static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
                              unsigned long address, void *arg)
      {
              struct page_referenced_arg *pra = arg;
              struct page_vma_mapped_walk pvmw = {
                      .page = page,
                      .vma = vma,
                      .address = address,
              };
              int referenced = 0;
      
              while (page_vma_mapped_walk(&pvmw)) {
                      address = pvmw.address;
      
                      if (vma->vm_flags & VM_LOCKED) {
                              page_vma_mapped_walk_done(&pvmw);
                              pra->vm_flags |= VM_LOCKED;
                              return false; /* To break the loop */
                      }
      
                      if (pvmw.pte) {
                              if (ptep_clear_flush_young_notify(vma, address,
                                                      pvmw.pte)) {
                                      /*
                                       * Don't treat a reference through
                                       * a sequentially read mapping as such.
                                       * If the page has been used in another mapping,
                                       * we will catch it; if this other mapping is
                                       * already gone, the unmap path will have set
                                       * PG_referenced or activated the page.
                                       */
                                      if (likely(!(vma->vm_flags & VM_SEQ_READ)))
                                              referenced++;
                              }
                      } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                              if (pmdp_clear_flush_young_notify(vma, address,
                                                      pvmw.pmd))
                                      referenced++;
                      } else {
                              /* unexpected pmd-mapped page? */
                              WARN_ON_ONCE(1);
                      }
      
                      pra->mapcount--;
              }
      
              if (referenced)
                      clear_page_idle(page);
              if (test_and_clear_page_young(page))
                      referenced++;
      
              if (referenced) {
                      pra->referenced++;
                      pra->vm_flags |= vma->vm_flags;
              }
      
              if (!pra->mapcount)
                      return false; /* To break the loop */
      
              return true;
      }
      
      static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
      {
              struct page_referenced_arg *pra = arg;
              struct mem_cgroup *memcg = pra->memcg;
      
              if (!mm_match_cgroup(vma->vm_mm, memcg))
                      return true;
      
              return false;
      }
      
      /**
       * page_referenced - test if the page was referenced
       * @page: the page to test
       * @is_locked: caller holds lock on the page
       * @memcg: target memory cgroup
       * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
       *
       * Quick test_and_clear_referenced for all mappings to a page,
       * returns the number of ptes which referenced the page.
       */
      int page_referenced(struct page *page,
                          int is_locked,
                          struct mem_cgroup *memcg,
                          unsigned long *vm_flags)
      {
              int we_locked = 0;
              struct page_referenced_arg pra = {
                      .mapcount = total_mapcount(page),
                      .memcg = memcg,
              };
              struct rmap_walk_control rwc = {
                      .rmap_one = page_referenced_one,
                      .arg = (void *)&pra,
                      .anon_lock = page_lock_anon_vma_read,
              };
      
              *vm_flags = 0;
              if (!pra.mapcount)
                      return 0;
      
              if (!page_rmapping(page))
                      return 0;
      
              if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
                      we_locked = trylock_page(page);
                      if (!we_locked)
                              return 1;
              }
      
              /*
               * If we are reclaiming on behalf of a cgroup, skip
               * counting on behalf of references from different
               * cgroups
               */
              if (memcg) {
                      rwc.invalid_vma = invalid_page_referenced_vma;
              }
      
              rmap_walk(page, &rwc);
              *vm_flags = pra.vm_flags;
      
              if (we_locked)
                      unlock_page(page);
      
              return pra.referenced;
      }
      
      static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
                                  unsigned long address, void *arg)
      {
              struct page_vma_mapped_walk pvmw = {
                      .page = page,
                      .vma = vma,
                      .address = address,
                      .flags = PVMW_SYNC,
              };
              struct mmu_notifier_range range;
              int *cleaned = arg;
      
              /*
               * We have to assume the worse case ie pmd for invalidation. Note that
               * the page can not be free from this function.
               */
              mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                      0, vma, vma->vm_mm, address,
                                      min(vma->vm_end, address + page_size(page)));
              mmu_notifier_invalidate_range_start(&range);
      
              while (page_vma_mapped_walk(&pvmw)) {
                      int ret = 0;
      
                      address = pvmw.address;
                      if (pvmw.pte) {
                              pte_t entry;
                              pte_t *pte = pvmw.pte;
      
                              if (!pte_dirty(*pte) && !pte_write(*pte))
                                      continue;
      
                              flush_cache_page(vma, address, pte_pfn(*pte));
                              entry = ptep_clear_flush(vma, address, pte);
                              entry = pte_wrprotect(entry);
                              entry = pte_mkclean(entry);
                              set_pte_at(vma->vm_mm, address, pte, entry);
                              ret = 1;
                      } else {
      #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                              pmd_t *pmd = pvmw.pmd;
                              pmd_t entry;
      
                              if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
                                      continue;
      
                              flush_cache_page(vma, address, page_to_pfn(page));
                              entry = pmdp_invalidate(vma, address, pmd);
                              entry = pmd_wrprotect(entry);
                              entry = pmd_mkclean(entry);
                              set_pmd_at(vma->vm_mm, address, pmd, entry);
                              ret = 1;
      #else
                              /* unexpected pmd-mapped page? */
                              WARN_ON_ONCE(1);
      #endif
                      }
      
                      /*
                       * No need to call mmu_notifier_invalidate_range() as we are
                       * downgrading page table protection not changing it to point
                       * to a new page.
                       *
                       * See Documentation/vm/mmu_notifier.rst
                       */
                      if (ret)
                              (*cleaned)++;
              }
      
              mmu_notifier_invalidate_range_end(&range);
      
              return true;
      }
      
      static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
      {
              if (vma->vm_flags & VM_SHARED)
                      return false;
      
              return true;
      }
      
      int page_mkclean(struct page *page)
      {
              int cleaned = 0;
              struct address_space *mapping;
              struct rmap_walk_control rwc = {
                      .arg = (void *)&cleaned,
                      .rmap_one = page_mkclean_one,
                      .invalid_vma = invalid_mkclean_vma,
              };
      
              BUG_ON(!PageLocked(page));
      
              if (!page_mapped(page))
                      return 0;
      
              mapping = page_mapping(page);
              if (!mapping)
                      return 0;
      
              rmap_walk(page, &rwc);
      
              return cleaned;
      }
      EXPORT_SYMBOL_GPL(page_mkclean);
      
      /**
       * page_move_anon_rmap - move a page to our anon_vma
       * @page:        the page to move to our anon_vma
       * @vma:        the vma the page belongs to
       *
       * When a page belongs exclusively to one process after a COW event,
       * that page can be moved into the anon_vma that belongs to just that
       * process, so the rmap code will not search the parent or sibling
       * processes.
       */
      void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
      {
              struct anon_vma *anon_vma = vma->anon_vma;
      
              page = compound_head(page);
      
              VM_BUG_ON_PAGE(!PageLocked(page), page);
              VM_BUG_ON_VMA(!anon_vma, vma);
      
              anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
              /*
               * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
               * simultaneously, so a concurrent reader (eg page_referenced()'s
               * PageAnon()) will not see one without the other.
               */
              WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
      }
      
      /**
       * __page_set_anon_rmap - set up new anonymous rmap
       * @page:        Page or Hugepage to add to rmap
       * @vma:        VM area to add page to.
       * @address:        User virtual address of the mapping        
       * @exclusive:        the page is exclusively owned by the current process
       */
      static void __page_set_anon_rmap(struct page *page,
              struct vm_area_struct *vma, unsigned long address, int exclusive)
      {
  177         struct anon_vma *anon_vma = vma->anon_vma;
      
              BUG_ON(!anon_vma);
      
  177         if (PageAnon(page))
                      return;
      
              /*
               * If the page isn't exclusively mapped into this vma,
               * we must use the _oldest_ possible anon_vma for the
               * page mapping!
               */
  177         if (!exclusive)
                      anon_vma = anon_vma->root;
      
              /*
               * page_idle does a lockless/optimistic rmap scan on page->mapping.
               * Make sure the compiler doesn't split the stores of anon_vma and
               * the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
               * could mistake the mapping for a struct address_space and crash.
               */
  177         anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
              WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
  177         page->index = linear_page_index(vma, address);
      }
      
      /**
       * __page_check_anon_rmap - sanity check anonymous rmap addition
       * @page:        the page to add the mapping to
       * @vma:        the vm area in which the mapping is added
       * @address:        the user virtual address mapped
       */
      static void __page_check_anon_rmap(struct page *page,
              struct vm_area_struct *vma, unsigned long address)
      {
              /*
               * The page's anon-rmap details (mapping and index) are guaranteed to
               * be set up correctly at this point.
               *
               * We have exclusion against page_add_anon_rmap because the caller
               * always holds the page locked, except if called from page_dup_rmap,
               * in which case the page is already known to be setup.
               *
               * We have exclusion against page_add_new_anon_rmap because those pages
               * are initially only visible via the pagetables, and the pte is locked
               * over the call to page_add_new_anon_rmap.
               */
              VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
              VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
                             page);
      }
      
      /**
       * page_add_anon_rmap - add pte mapping to an anonymous page
       * @page:        the page to add the mapping to
       * @vma:        the vm area in which the mapping is added
       * @address:        the user virtual address mapped
       * @compound:        charge the page as compound or small page
       *
       * The caller needs to hold the pte lock, and the page must be locked in
       * the anon_vma case: to serialize mapping,index checking after setting,
       * and to ensure that PageAnon is not being upgraded racily to PageKsm
       * (but PageKsm is never downgraded to PageAnon).
       */
      void page_add_anon_rmap(struct page *page,
              struct vm_area_struct *vma, unsigned long address, bool compound)
      {
              do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
      }
      
      /*
       * Special version of the above for do_swap_page, which often runs
       * into pages that are exclusively owned by the current process.
       * Everybody else should continue to use page_add_anon_rmap above.
       */
      void do_page_add_anon_rmap(struct page *page,
              struct vm_area_struct *vma, unsigned long address, int flags)
      {
              bool compound = flags & RMAP_COMPOUND;
              bool first;
      
              if (unlikely(PageKsm(page)))
                      lock_page_memcg(page);
              else
                      VM_BUG_ON_PAGE(!PageLocked(page), page);
      
              if (compound) {
                      atomic_t *mapcount;
                      VM_BUG_ON_PAGE(!PageLocked(page), page);
                      VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                      mapcount = compound_mapcount_ptr(page);
                      first = atomic_inc_and_test(mapcount);
              } else {
                      first = atomic_inc_and_test(&page->_mapcount);
              }
      
              if (first) {
                      int nr = compound ? thp_nr_pages(page) : 1;
                      /*
                       * We use the irq-unsafe __{inc|mod}_zone_page_stat because
                       * these counters are not modified in interrupt context, and
                       * pte lock(a spinlock) is held, which implies preemption
                       * disabled.
                       */
                      if (compound)
                              __inc_lruvec_page_state(page, NR_ANON_THPS);
                      __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
              }
      
              if (unlikely(PageKsm(page))) {
                      unlock_page_memcg(page);
                      return;
              }
      
              /* address might be in next vma when migration races vma_adjust */
              if (first)
                      __page_set_anon_rmap(page, vma, address,
                                      flags & RMAP_EXCLUSIVE);
              else
                      __page_check_anon_rmap(page, vma, address);
      }
      
      /**
       * page_add_new_anon_rmap - add pte mapping to a new anonymous page
       * @page:        the page to add the mapping to
       * @vma:        the vm area in which the mapping is added
       * @address:        the user virtual address mapped
       * @compound:        charge the page as compound or small page
       *
       * Same as page_add_anon_rmap but must only be called on *new* pages.
       * This means the inc-and-test can be bypassed.
       * Page does not have to be locked.
       */
      void page_add_new_anon_rmap(struct page *page,
              struct vm_area_struct *vma, unsigned long address, bool compound)
      {
  177         int nr = compound ? thp_nr_pages(page) : 1;
      
  177         VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
  177         __SetPageSwapBacked(page);
              if (compound) {
                      VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                      /* increment count (starts at -1) */
                      atomic_set(compound_mapcount_ptr(page), 0);
                      if (hpage_pincount_available(page))
                              atomic_set(compound_pincount_ptr(page), 0);
      
                      __inc_lruvec_page_state(page, NR_ANON_THPS);
              } else {
                      /* Anon THP always mapped first with PMD */
                      VM_BUG_ON_PAGE(PageTransCompound(page), page);
                      /* increment count (starts at -1) */
  177                 atomic_set(&page->_mapcount, 0);
              }
              __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
              __page_set_anon_rmap(page, vma, address, 1);
      }
      
      /**
       * page_add_file_rmap - add pte mapping to a file page
       * @page: the page to add the mapping to
       * @compound: charge the page as compound or small page
       *
       * The caller needs to hold the pte lock.
       */
      void page_add_file_rmap(struct page *page, bool compound)
      {
              int i, nr = 1;
      
              VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
              lock_page_memcg(page);
              if (compound && PageTransHuge(page)) {
                      for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
                              if (atomic_inc_and_test(&page[i]._mapcount))
                                      nr++;
                      }
                      if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
                              goto out;
                      if (PageSwapBacked(page))
                              __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
                      else
                              __inc_node_page_state(page, NR_FILE_PMDMAPPED);
              } else {
                      if (PageTransCompound(page) && page_mapping(page)) {
                              VM_WARN_ON_ONCE(!PageLocked(page));
      
                              SetPageDoubleMap(compound_head(page));
                              if (PageMlocked(page))
                                      clear_page_mlock(compound_head(page));
                      }
                      if (!atomic_inc_and_test(&page->_mapcount))
                              goto out;
              }
              __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
      out:
              unlock_page_memcg(page);
      }
      
      static void page_remove_file_rmap(struct page *page, bool compound)
      {
              int i, nr = 1;
      
              VM_BUG_ON_PAGE(compound && !PageHead(page), page);
      
              /* Hugepages are not counted in NR_FILE_MAPPED for now. */
              if (unlikely(PageHuge(page))) {
                      /* hugetlb pages are always mapped with pmds */
                      atomic_dec(compound_mapcount_ptr(page));
                      return;
              }
      
              /* page still mapped by someone else? */
              if (compound && PageTransHuge(page)) {
                      for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
                              if (atomic_add_negative(-1, &page[i]._mapcount))
                                      nr++;
                      }
                      if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
                              return;
                      if (PageSwapBacked(page))
                              __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
                      else
                              __dec_node_page_state(page, NR_FILE_PMDMAPPED);
              } else {
                      if (!atomic_add_negative(-1, &page->_mapcount))
                              return;
              }
      
              /*
               * We use the irq-unsafe __{inc|mod}_lruvec_page_state because
               * these counters are not modified in interrupt context, and
               * pte lock(a spinlock) is held, which implies preemption disabled.
               */
              __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
      
              if (unlikely(PageMlocked(page)))
                      clear_page_mlock(page);
      }
      
      static void page_remove_anon_compound_rmap(struct page *page)
      {
              int i, nr;
      
              if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
                      return;
      
              /* Hugepages are not counted in NR_ANON_PAGES for now. */
              if (unlikely(PageHuge(page)))
                      return;
      
              if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                      return;
      
              __dec_lruvec_page_state(page, NR_ANON_THPS);
      
              if (TestClearPageDoubleMap(page)) {
                      /*
                       * Subpages can be mapped with PTEs too. Check how many of
                       * them are still mapped.
                       */
                      for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
                              if (atomic_add_negative(-1, &page[i]._mapcount))
                                      nr++;
                      }
      
                      /*
                       * Queue the page for deferred split if at least one small
                       * page of the compound page is unmapped, but at least one
                       * small page is still mapped.
                       */
                      if (nr && nr < thp_nr_pages(page))
                              deferred_split_huge_page(page);
              } else {
                      nr = thp_nr_pages(page);
              }
      
              if (unlikely(PageMlocked(page)))
                      clear_page_mlock(page);
      
              if (nr)
                      __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
      }
      
      /**
       * page_remove_rmap - take down pte mapping from a page
       * @page:        page to remove mapping from
       * @compound:        uncharge the page as compound or small page
       *
       * The caller needs to hold the pte lock.
       */
      void page_remove_rmap(struct page *page, bool compound)
      {
    1         lock_page_memcg(page);
      
    1         if (!PageAnon(page)) {
                      page_remove_file_rmap(page, compound);
                      goto out;
              }
      
    1         if (compound) {
                      page_remove_anon_compound_rmap(page);
                      goto out;
              }
      
              /* page still mapped by someone else? */
    1         if (!atomic_add_negative(-1, &page->_mapcount))
                      goto out;
      
              /*
               * We use the irq-unsafe __{inc|mod}_zone_page_stat because
               * these counters are not modified in interrupt context, and
               * pte lock(a spinlock) is held, which implies preemption disabled.
               */
              __dec_lruvec_page_state(page, NR_ANON_MAPPED);
      
              if (unlikely(PageMlocked(page)))
                      clear_page_mlock(page);
      
              if (PageTransCompound(page))
                      deferred_split_huge_page(compound_head(page));
      
              /*
               * It would be tidy to reset the PageAnon mapping here,
               * but that might overwrite a racing page_add_anon_rmap
               * which increments mapcount after us but sets mapping
               * before us: so leave the reset to free_unref_page,
               * and remember that it's only reliable while mapped.
               * Leaving it set also helps swapoff to reinstate ptes
               * faster for those pages still in swapcache.
               */
      out:
    1         unlock_page_memcg(page);
      }
      
      /*
       * @arg: enum ttu_flags will be passed to this argument
       */
      static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                           unsigned long address, void *arg)
      {
              struct mm_struct *mm = vma->vm_mm;
              struct page_vma_mapped_walk pvmw = {
                      .page = page,
                      .vma = vma,
                      .address = address,
              };
              pte_t pteval;
              struct page *subpage;
              bool ret = true;
              struct mmu_notifier_range range;
              enum ttu_flags flags = (enum ttu_flags)(long)arg;
      
              /* munlock has nothing to gain from examining un-locked vmas */
              if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
                      return true;
      
              if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
                  is_zone_device_page(page) && !is_device_private_page(page))
                      return true;
      
              if (flags & TTU_SPLIT_HUGE_PMD) {
                      split_huge_pmd_address(vma, address,
                                      flags & TTU_SPLIT_FREEZE, page);
              }
      
              /*
               * For THP, we have to assume the worse case ie pmd for invalidation.
               * For hugetlb, it could be much worse if we need to do pud
               * invalidation in the case of pmd sharing.
               *
               * Note that the page can not be free in this function as call of
               * try_to_unmap() must hold a reference on the page.
               */
              mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
                                      address,
                                      min(vma->vm_end, address + page_size(page)));
              if (PageHuge(page)) {
                      /*
                       * If sharing is possible, start and end will be adjusted
                       * accordingly.
                       */
                      adjust_range_if_pmd_sharing_possible(vma, &range.start,
                                                           &range.end);
              }
              mmu_notifier_invalidate_range_start(&range);
      
              while (page_vma_mapped_walk(&pvmw)) {
      #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
                      /* PMD-mapped THP migration entry */
                      if (!pvmw.pte && (flags & TTU_MIGRATION)) {
                              VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
      
                              set_pmd_migration_entry(&pvmw, page);
                              continue;
                      }
      #endif
      
                      /*
                       * If the page is mlock()d, we cannot swap it out.
                       * If it's recently referenced (perhaps page_referenced
                       * skipped over this mm) then we should reactivate it.
                       */
                      if (!(flags & TTU_IGNORE_MLOCK)) {
                              if (vma->vm_flags & VM_LOCKED) {
                                      /* PTE-mapped THP are never mlocked */
                                      if (!PageTransCompound(page)) {
                                              /*
                                               * Holding pte lock, we do *not* need
                                               * mmap_lock here
                                               */
                                              mlock_vma_page(page);
                                      }
                                      ret = false;
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
                              if (flags & TTU_MUNLOCK)
                                      continue;
                      }
      
                      /* Unexpected PMD-mapped THP? */
                      VM_BUG_ON_PAGE(!pvmw.pte, page);
      
                      subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
                      address = pvmw.address;
      
                      if (PageHuge(page) && !PageAnon(page)) {
                              /*
                               * To call huge_pmd_unshare, i_mmap_rwsem must be
                               * held in write mode.  Caller needs to explicitly
                               * do this outside rmap routines.
                               */
                              VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                              if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
                                      /*
                                       * huge_pmd_unshare unmapped an entire PMD
                                       * page.  There is no way of knowing exactly
                                       * which PMDs may be cached for this mm, so
                                       * we must flush them all.  start/end were
                                       * already adjusted above to cover this range.
                                       */
                                      flush_cache_range(vma, range.start, range.end);
                                      flush_tlb_range(vma, range.start, range.end);
                                      mmu_notifier_invalidate_range(mm, range.start,
                                                                    range.end);
      
                                      /*
                                       * The ref count of the PMD page was dropped
                                       * which is part of the way map counting
                                       * is done for shared PMDs.  Return 'true'
                                       * here.  When there is no other sharing,
                                       * huge_pmd_unshare returns false and we will
                                       * unmap the actual page and drop map count
                                       * to zero.
                                       */
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
                      }
      
                      if (IS_ENABLED(CONFIG_MIGRATION) &&
                          (flags & TTU_MIGRATION) &&
                          is_zone_device_page(page)) {
                              swp_entry_t entry;
                              pte_t swp_pte;
      
                              pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);
      
                              /*
                               * Store the pfn of the page in a special migration
                               * pte. do_swap_page() will wait until the migration
                               * pte is removed and then restart fault handling.
                               */
                              entry = make_migration_entry(page, 0);
                              swp_pte = swp_entry_to_pte(entry);
      
                              /*
                               * pteval maps a zone device page and is therefore
                               * a swap pte.
                               */
                              if (pte_swp_soft_dirty(pteval))
                                      swp_pte = pte_swp_mksoft_dirty(swp_pte);
                              if (pte_swp_uffd_wp(pteval))
                                      swp_pte = pte_swp_mkuffd_wp(swp_pte);
                              set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
                              /*
                               * No need to invalidate here it will synchronize on
                               * against the special swap migration pte.
                               *
                               * The assignment to subpage above was computed from a
                               * swap PTE which results in an invalid pointer.
                               * Since only PAGE_SIZE pages can currently be
                               * migrated, just set it to page. This will need to be
                               * changed when hugepage migrations to device private
                               * memory are supported.
                               */
                              subpage = page;
                              goto discard;
                      }
      
                      /* Nuke the page table entry. */
                      flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
                      if (should_defer_flush(mm, flags)) {
                              /*
                               * We clear the PTE but do not flush so potentially
                               * a remote CPU could still be writing to the page.
                               * If the entry was previously clean then the
                               * architecture must guarantee that a clear->dirty
                               * transition on a cached TLB entry is written through
                               * and traps if the PTE is unmapped.
                               */
                              pteval = ptep_get_and_clear(mm, address, pvmw.pte);
      
                              set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
                      } else {
                              pteval = ptep_clear_flush(vma, address, pvmw.pte);
                      }
      
                      /* Move the dirty bit to the page. Now the pte is gone. */
                      if (pte_dirty(pteval))
                              set_page_dirty(page);
      
                      /* Update high watermark before we lower rss */
                      update_hiwater_rss(mm);
      
                      if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
                              pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
                              if (PageHuge(page)) {
                                      hugetlb_count_sub(compound_nr(page), mm);
                                      set_huge_swap_pte_at(mm, address,
                                                           pvmw.pte, pteval,
                                                           vma_mmu_pagesize(vma));
                              } else {
                                      dec_mm_counter(mm, mm_counter(page));
                                      set_pte_at(mm, address, pvmw.pte, pteval);
                              }
      
                      } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
                              /*
                               * The guest indicated that the page content is of no
                               * interest anymore. Simply discard the pte, vmscan
                               * will take care of the rest.
                               * A future reference will then fault in a new zero
                               * page. When userfaultfd is active, we must not drop
                               * this page though, as its main user (postcopy
                               * migration) will not expect userfaults on already
                               * copied pages.
                               */
                              dec_mm_counter(mm, mm_counter(page));
                              /* We have to invalidate as we cleared the pte */
                              mmu_notifier_invalidate_range(mm, address,
                                                            address + PAGE_SIZE);
                      } else if (IS_ENABLED(CONFIG_MIGRATION) &&
                                      (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
                              swp_entry_t entry;
                              pte_t swp_pte;
      
                              if (arch_unmap_one(mm, vma, address, pteval) < 0) {
                                      set_pte_at(mm, address, pvmw.pte, pteval);
                                      ret = false;
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
      
                              /*
                               * Store the pfn of the page in a special migration
                               * pte. do_swap_page() will wait until the migration
                               * pte is removed and then restart fault handling.
                               */
                              entry = make_migration_entry(subpage,
                                              pte_write(pteval));
                              swp_pte = swp_entry_to_pte(entry);
                              if (pte_soft_dirty(pteval))
                                      swp_pte = pte_swp_mksoft_dirty(swp_pte);
                              if (pte_uffd_wp(pteval))
                                      swp_pte = pte_swp_mkuffd_wp(swp_pte);
                              set_pte_at(mm, address, pvmw.pte, swp_pte);
                              /*
                               * No need to invalidate here it will synchronize on
                               * against the special swap migration pte.
                               */
                      } else if (PageAnon(page)) {
                              swp_entry_t entry = { .val = page_private(subpage) };
                              pte_t swp_pte;
                              /*
                               * Store the swap location in the pte.
                               * See handle_pte_fault() ...
                               */
                              if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
                                      WARN_ON_ONCE(1);
                                      ret = false;
                                      /* We have to invalidate as we cleared the pte */
                                      mmu_notifier_invalidate_range(mm, address,
                                                              address + PAGE_SIZE);
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
      
                              /* MADV_FREE page check */
                              if (!PageSwapBacked(page)) {
                                      if (!PageDirty(page)) {
                                              /* Invalidate as we cleared the pte */
                                              mmu_notifier_invalidate_range(mm,
                                                      address, address + PAGE_SIZE);
                                              dec_mm_counter(mm, MM_ANONPAGES);
                                              goto discard;
                                      }
      
                                      /*
                                       * If the page was redirtied, it cannot be
                                       * discarded. Remap the page to page table.
                                       */
                                      set_pte_at(mm, address, pvmw.pte, pteval);
                                      SetPageSwapBacked(page);
                                      ret = false;
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
      
                              if (swap_duplicate(entry) < 0) {
                                      set_pte_at(mm, address, pvmw.pte, pteval);
                                      ret = false;
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
                              if (arch_unmap_one(mm, vma, address, pteval) < 0) {
                                      set_pte_at(mm, address, pvmw.pte, pteval);
                                      ret = false;
                                      page_vma_mapped_walk_done(&pvmw);
                                      break;
                              }
                              if (list_empty(&mm->mmlist)) {
                                      spin_lock(&mmlist_lock);
                                      if (list_empty(&mm->mmlist))
                                              list_add(&mm->mmlist, &init_mm.mmlist);
                                      spin_unlock(&mmlist_lock);
                              }
                              dec_mm_counter(mm, MM_ANONPAGES);
                              inc_mm_counter(mm, MM_SWAPENTS);
                              swp_pte = swp_entry_to_pte(entry);
                              if (pte_soft_dirty(pteval))
                                      swp_pte = pte_swp_mksoft_dirty(swp_pte);
                              if (pte_uffd_wp(pteval))
                                      swp_pte = pte_swp_mkuffd_wp(swp_pte);
                              set_pte_at(mm, address, pvmw.pte, swp_pte);
                              /* Invalidate as we cleared the pte */
                              mmu_notifier_invalidate_range(mm, address,
                                                            address + PAGE_SIZE);
                      } else {
                              /*
                               * This is a locked file-backed page, thus it cannot
                               * be removed from the page cache and replaced by a new
                               * page before mmu_notifier_invalidate_range_end, so no
                               * concurrent thread might update its page table to
                               * point at new page while a device still is using this
                               * page.
                               *
                               * See Documentation/vm/mmu_notifier.rst
                               */
                              dec_mm_counter(mm, mm_counter_file(page));
                      }
      discard:
                      /*
                       * No need to call mmu_notifier_invalidate_range() it has be
                       * done above for all cases requiring it to happen under page
                       * table lock before mmu_notifier_invalidate_range_end()
                       *
                       * See Documentation/vm/mmu_notifier.rst
                       */
                      page_remove_rmap(subpage, PageHuge(page));
                      put_page(page);
              }
      
              mmu_notifier_invalidate_range_end(&range);
      
              return ret;
      }
      
      static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
      {
              return vma_is_temporary_stack(vma);
      }
      
      static int page_mapcount_is_zero(struct page *page)
      {
              return !total_mapcount(page);
      }
      
      /**
       * try_to_unmap - try to remove all page table mappings to a page
       * @page: the page to get unmapped
       * @flags: action and flags
       *
       * Tries to remove all the page table entries which are mapping this
       * page, used in the pageout path.  Caller must hold the page lock.
       *
       * If unmap is successful, return true. Otherwise, false.
       */
      bool try_to_unmap(struct page *page, enum ttu_flags flags)
      {
              struct rmap_walk_control rwc = {
                      .rmap_one = try_to_unmap_one,
                      .arg = (void *)flags,
                      .done = page_mapcount_is_zero,
                      .anon_lock = page_lock_anon_vma_read,
              };
      
              /*
               * During exec, a temporary VMA is setup and later moved.
               * The VMA is moved under the anon_vma lock but not the
               * page tables leading to a race where migration cannot
               * find the migration ptes. Rather than increasing the
               * locking requirements of exec(), migration skips
               * temporary VMAs until after exec() completes.
               */
              if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))
                  && !PageKsm(page) && PageAnon(page))
                      rwc.invalid_vma = invalid_migration_vma;
      
              if (flags & TTU_RMAP_LOCKED)
                      rmap_walk_locked(page, &rwc);
              else
                      rmap_walk(page, &rwc);
      
              return !page_mapcount(page) ? true : false;
      }
      
      static int page_not_mapped(struct page *page)
      {
              return !page_mapped(page);
      };
      
      /**
       * try_to_munlock - try to munlock a page
       * @page: the page to be munlocked
       *
       * Called from munlock code.  Checks all of the VMAs mapping the page
       * to make sure nobody else has this page mlocked. The page will be
       * returned with PG_mlocked cleared if no other vmas have it mlocked.
       */
      
      void try_to_munlock(struct page *page)
      {
              struct rmap_walk_control rwc = {
                      .rmap_one = try_to_unmap_one,
                      .arg = (void *)TTU_MUNLOCK,
                      .done = page_not_mapped,
                      .anon_lock = page_lock_anon_vma_read,
      
              };
      
              VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
              VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
      
              rmap_walk(page, &rwc);
      }
      
      void __put_anon_vma(struct anon_vma *anon_vma)
      {
              struct anon_vma *root = anon_vma->root;
      
              anon_vma_free(anon_vma);
              if (root != anon_vma && atomic_dec_and_test(&root->refcount))
                      anon_vma_free(root);
      }
      
      static struct anon_vma *rmap_walk_anon_lock(struct page *page,
                                              struct rmap_walk_control *rwc)
      {
              struct anon_vma *anon_vma;
      
              if (rwc->anon_lock)
                      return rwc->anon_lock(page);
      
              /*
               * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
               * because that depends on page_mapped(); but not all its usages
               * are holding mmap_lock. Users without mmap_lock are required to
               * take a reference count to prevent the anon_vma disappearing
               */
              anon_vma = page_anon_vma(page);
              if (!anon_vma)
                      return NULL;
      
              anon_vma_lock_read(anon_vma);
              return anon_vma;
      }
      
      /*
       * rmap_walk_anon - do something to anonymous page using the object-based
       * rmap method
       * @page: the page to be handled
       * @rwc: control variable according to each walk type
       *
       * Find all the mappings of a page using the mapping pointer and the vma chains
       * contained in the anon_vma struct it points to.
       *
       * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
       * where the page was found will be held for write.  So, we won't recheck
       * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
       * LOCKED.
       */
      static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
                      bool locked)
      {
              struct anon_vma *anon_vma;
              pgoff_t pgoff_start, pgoff_end;
              struct anon_vma_chain *avc;
      
              if (locked) {
                      anon_vma = page_anon_vma(page);
                      /* anon_vma disappear under us? */
                      VM_BUG_ON_PAGE(!anon_vma, page);
              } else {
                      anon_vma = rmap_walk_anon_lock(page, rwc);
              }
              if (!anon_vma)
                      return;
      
              pgoff_start = page_to_pgoff(page);
              pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
              anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
                              pgoff_start, pgoff_end) {
                      struct vm_area_struct *vma = avc->vma;
                      unsigned long address = vma_address(page, vma);
      
                      cond_resched();
      
                      if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                              continue;
      
                      if (!rwc->rmap_one(page, vma, address, rwc->arg))
                              break;
                      if (rwc->done && rwc->done(page))
                              break;
              }
      
              if (!locked)
                      anon_vma_unlock_read(anon_vma);
      }
      
      /*
       * rmap_walk_file - do something to file page using the object-based rmap method
       * @page: the page to be handled
       * @rwc: control variable according to each walk type
       *
       * Find all the mappings of a page using the mapping pointer and the vma chains
       * contained in the address_space struct it points to.
       *
       * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
       * where the page was found will be held for write.  So, we won't recheck
       * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
       * LOCKED.
       */
      static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
                      bool locked)
      {
              struct address_space *mapping = page_mapping(page);
              pgoff_t pgoff_start, pgoff_end;
              struct vm_area_struct *vma;
      
              /*
               * The page lock not only makes sure that page->mapping cannot
               * suddenly be NULLified by truncation, it makes sure that the
               * structure at mapping cannot be freed and reused yet,
               * so we can safely take mapping->i_mmap_rwsem.
               */
              VM_BUG_ON_PAGE(!PageLocked(page), page);
      
              if (!mapping)
                      return;
      
              pgoff_start = page_to_pgoff(page);
              pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
              if (!locked)
                      i_mmap_lock_read(mapping);
              vma_interval_tree_foreach(vma, &mapping->i_mmap,
                              pgoff_start, pgoff_end) {
                      unsigned long address = vma_address(page, vma);
      
                      cond_resched();
      
                      if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
                              continue;
      
                      if (!rwc->rmap_one(page, vma, address, rwc->arg))
                              goto done;
                      if (rwc->done && rwc->done(page))
                              goto done;
              }
      
      done:
              if (!locked)
                      i_mmap_unlock_read(mapping);
      }
      
      void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
      {
              if (unlikely(PageKsm(page)))
                      rmap_walk_ksm(page, rwc);
              else if (PageAnon(page))
                      rmap_walk_anon(page, rwc, false);
              else
                      rmap_walk_file(page, rwc, false);
      }
      
      /* Like rmap_walk, but caller holds relevant rmap lock */
      void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
      {
              /* no ksm support for now */
              VM_BUG_ON_PAGE(PageKsm(page), page);
              if (PageAnon(page))
                      rmap_walk_anon(page, rwc, true);
              else
                      rmap_walk_file(page, rwc, true);
      }
      
      #ifdef CONFIG_HUGETLB_PAGE
      /*
       * The following two functions are for anonymous (private mapped) hugepages.
       * Unlike common anonymous pages, anonymous hugepages have no accounting code
       * and no lru code, because we handle hugepages differently from common pages.
       */
      void hugepage_add_anon_rmap(struct page *page,
                                  struct vm_area_struct *vma, unsigned long address)
      {
              struct anon_vma *anon_vma = vma->anon_vma;
              int first;
      
              BUG_ON(!PageLocked(page));
              BUG_ON(!anon_vma);
              /* address might be in next vma when migration races vma_adjust */
              first = atomic_inc_and_test(compound_mapcount_ptr(page));
              if (first)
                      __page_set_anon_rmap(page, vma, address, 0);
      }
      
      void hugepage_add_new_anon_rmap(struct page *page,
                              struct vm_area_struct *vma, unsigned long address)
      {
              BUG_ON(address < vma->vm_start || address >= vma->vm_end);
              atomic_set(compound_mapcount_ptr(page), 0);
              if (hpage_pincount_available(page))
                      atomic_set(compound_pincount_ptr(page), 0);
      
              __page_set_anon_rmap(page, vma, address, 1);
      }
      #endif /* CONFIG_HUGETLB_PAGE */
      // SPDX-License-Identifier: GPL-2.0-or-later
      /* bit search implementation
       *
       * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
       * Written by David Howells (dhowells@redhat.com)
       *
       * Copyright (C) 2008 IBM Corporation
       * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
       * (Inspired by David Howell's find_next_bit implementation)
       *
       * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
       * size and improve performance, 2015.
       */
      
      #include <linux/bitops.h>
      #include <linux/bitmap.h>
      #include <linux/export.h>
      #include <linux/math.h>
      #include <linux/minmax.h>
      #include <linux/swab.h>
      
      #if !defined(find_next_bit) || !defined(find_next_zero_bit) ||                        \
              !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) ||        \
              !defined(find_next_and_bit)
      /*
       * This is a common helper function for find_next_bit, find_next_zero_bit, and
       * find_next_and_bit. The differences are:
       *  - The "invert" argument, which is XORed with each fetched word before
       *    searching it for one bits.
       *  - The optional "addr2", which is anded with "addr1" if present.
       */
      static unsigned long _find_next_bit(const unsigned long *addr1,
                      const unsigned long *addr2, unsigned long nbits,
                      unsigned long start, unsigned long invert, unsigned long le)
      {
              unsigned long tmp, mask;
      
  666         if (unlikely(start >= nbits))
                      return nbits;
      
  666         tmp = addr1[start / BITS_PER_LONG];
              if (addr2)
   79                 tmp &= addr2[start / BITS_PER_LONG];
  668         tmp ^= invert;
      
              /* Handle 1st word. */
              mask = BITMAP_FIRST_WORD_MASK(start);
              if (le)
                      mask = swab(mask);
      
              tmp &= mask;
      
              start = round_down(start, BITS_PER_LONG);
      
    3         while (!tmp) {
  340                 start += BITS_PER_LONG;
                      if (start >= nbits)
                              return nbits;
      
    3                 tmp = addr1[start / BITS_PER_LONG];
                      if (addr2)
                              tmp &= addr2[start / BITS_PER_LONG];
    3                 tmp ^= invert;
              }
      
              if (le)
                      tmp = swab(tmp);
      
  668         return min(start + __ffs(tmp), nbits);
      }
      #endif
      
      #ifndef find_next_bit
      /*
       * Find the next set bit in a memory region.
       */
      unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
                                  unsigned long offset)
      {
  346         return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
      }
      EXPORT_SYMBOL(find_next_bit);
      #endif
      
      #ifndef find_next_zero_bit
      unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
                                       unsigned long offset)
      {
  523         return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
      }
      EXPORT_SYMBOL(find_next_zero_bit);
      #endif
      
      #if !defined(find_next_and_bit)
      unsigned long find_next_and_bit(const unsigned long *addr1,
                      const unsigned long *addr2, unsigned long size,
                      unsigned long offset)
      {
   79         return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
      }
      EXPORT_SYMBOL(find_next_and_bit);
      #endif
      
      #ifndef find_first_bit
      /*
       * Find the first set bit in a memory region.
       */
      unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
      {
              unsigned long idx;
      
  196         for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
  197                 if (addr[idx])
  197                         return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
              }
      
              return size;
      }
      EXPORT_SYMBOL(find_first_bit);
      #endif
      
      #ifndef find_first_zero_bit
      /*
       * Find the first cleared bit in a memory region.
       */
      unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
      {
              unsigned long idx;
      
   57         for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
   57                 if (addr[idx] != ~0UL)
   57                         return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
              }
      
              return size;
      }
      EXPORT_SYMBOL(find_first_zero_bit);
      #endif
      
      #ifndef find_last_bit
      unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
      {
              if (size) {
                      unsigned long val = BITMAP_LAST_WORD_MASK(size);
                      unsigned long idx = (size-1) / BITS_PER_LONG;
      
                      do {
                              val &= addr[idx];
                              if (val)
                                      return idx * BITS_PER_LONG + __fls(val);
      
                              val = ~0ul;
                      } while (idx--);
              }
              return size;
      }
      EXPORT_SYMBOL(find_last_bit);
      #endif
      
      #ifdef __BIG_ENDIAN
      
      #ifndef find_next_zero_bit_le
      unsigned long find_next_zero_bit_le(const void *addr, unsigned
                      long size, unsigned long offset)
      {
              return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
      }
      EXPORT_SYMBOL(find_next_zero_bit_le);
      #endif
      
      #ifndef find_next_bit_le
      unsigned long find_next_bit_le(const void *addr, unsigned
                      long size, unsigned long offset)
      {
              return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
      }
      EXPORT_SYMBOL(find_next_bit_le);
      #endif
      
      #endif /* __BIG_ENDIAN */
      
      unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
                                     unsigned long size, unsigned long offset)
      {
              offset = find_next_bit(addr, size, offset);
              if (offset == size)
                      return size;
      
              offset = round_down(offset, 8);
              *clump = bitmap_get_value8(addr, offset);
      
              return offset;
      }
      EXPORT_SYMBOL(find_next_clump8);
      // SPDX-License-Identifier: GPL-2.0-only
      #include <linux/kdebug.h>
      #include <linux/kprobes.h>
      #include <linux/export.h>
      #include <linux/notifier.h>
      #include <linux/rcupdate.h>
      #include <linux/vmalloc.h>
      #include <linux/reboot.h>
      
      /*
       *        Notifier list for kernel code which wants to be called
       *        at shutdown. This is used to stop any idling DMA operations
       *        and the like.
       */
      BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
      
      /*
       *        Notifier chain core routines.  The exported routines below
       *        are layered on top of these, with appropriate locking added.
       */
      
      static int notifier_chain_register(struct notifier_block **nl,
                      struct notifier_block *n)
      {
              while ((*nl) != NULL) {
                      if (unlikely((*nl) == n)) {
                              WARN(1, "double register detected");
                              return 0;
                      }
                      if (n->priority > (*nl)->priority)
                              break;
                      nl = &((*nl)->next);
              }
              n->next = *nl;
              rcu_assign_pointer(*nl, n);
              return 0;
      }
      
      static int notifier_chain_unregister(struct notifier_block **nl,
                      struct notifier_block *n)
      {
              while ((*nl) != NULL) {
                      if ((*nl) == n) {
                              rcu_assign_pointer(*nl, n->next);
                              return 0;
                      }
                      nl = &((*nl)->next);
              }
              return -ENOENT;
      }
      
      /**
       * notifier_call_chain - Informs the registered notifiers about an event.
       *        @nl:                Pointer to head of the blocking notifier chain
       *        @val:                Value passed unmodified to notifier function
       *        @v:                Pointer passed unmodified to notifier function
       *        @nr_to_call:        Number of notifier functions to be called. Don't care
       *                        value of this parameter is -1.
       *        @nr_calls:        Records the number of notifications sent. Don't care
       *                        value of this field is NULL.
       *        @returns:        notifier_call_chain returns the value returned by the
       *                        last notifier function called.
       */
      static int notifier_call_chain(struct notifier_block **nl,
                                     unsigned long val, void *v,
                                     int nr_to_call, int *nr_calls)
      {
              int ret = NOTIFY_DONE;
              struct notifier_block *nb, *next_nb;
      
   31         nb = rcu_dereference_raw(*nl);
      
              while (nb && nr_to_call) {
                      next_nb = rcu_dereference_raw(nb->next);
      
      #ifdef CONFIG_DEBUG_NOTIFIERS
                      if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
                              WARN(1, "Invalid notifier called!");
                              nb = next_nb;
                              continue;
                      }
      #endif
                      ret = nb->notifier_call(nb, val, v);
      
                      if (nr_calls)
                              (*nr_calls)++;
      
                      if (ret & NOTIFY_STOP_MASK)
                              break;
                      nb = next_nb;
                      nr_to_call--;
              }
   31         return ret;
      }
      NOKPROBE_SYMBOL(notifier_call_chain);
      
      /**
       * notifier_call_chain_robust - Inform the registered notifiers about an event
       *                              and rollback on error.
       * @nl:                Pointer to head of the blocking notifier chain
       * @val_up:        Value passed unmodified to the notifier function
       * @val_down:        Value passed unmodified to the notifier function when recovering
       *              from an error on @val_up
       * @v                Pointer passed unmodified to the notifier function
       *
       * NOTE:        It is important the @nl chain doesn't change between the two
       *                invocations of notifier_call_chain() such that we visit the
       *                exact same notifier callbacks; this rules out any RCU usage.
       *
       * Returns:        the return value of the @val_up call.
       */
      static int notifier_call_chain_robust(struct notifier_block **nl,
                                           unsigned long val_up, unsigned long val_down,
                                           void *v)
      {
              int ret, nr = 0;
      
              ret = notifier_call_chain(nl, val_up, v, -1, &nr);
              if (ret & NOTIFY_STOP_MASK)
                      notifier_call_chain(nl, val_down, v, nr-1, NULL);
      
              return ret;
      }
      
      /*
       *        Atomic notifier chain routines.  Registration and unregistration
       *        use a spinlock, and call_chain is synchronized by RCU (no locks).
       */
      
      /**
       *        atomic_notifier_chain_register - Add notifier to an atomic notifier chain
       *        @nh: Pointer to head of the atomic notifier chain
       *        @n: New entry in notifier chain
       *
       *        Adds a notifier to an atomic notifier chain.
       *
       *        Currently always returns zero.
       */
      int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
                      struct notifier_block *n)
      {
              unsigned long flags;
              int ret;
      
              spin_lock_irqsave(&nh->lock, flags);
              ret = notifier_chain_register(&nh->head, n);
              spin_unlock_irqrestore(&nh->lock, flags);
              return ret;
      }
      EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
      
      /**
       *        atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
       *        @nh: Pointer to head of the atomic notifier chain
       *        @n: Entry to remove from notifier chain
       *
       *        Removes a notifier from an atomic notifier chain.
       *
       *        Returns zero on success or %-ENOENT on failure.
       */
      int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
                      struct notifier_block *n)
      {
              unsigned long flags;
              int ret;
      
              spin_lock_irqsave(&nh->lock, flags);
              ret = notifier_chain_unregister(&nh->head, n);
              spin_unlock_irqrestore(&nh->lock, flags);
              synchronize_rcu();
              return ret;
      }
      EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
      
      int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
                      unsigned long val_up, unsigned long val_down, void *v)
      {
              unsigned long flags;
              int ret;
      
              /*
               * Musn't use RCU; because then the notifier list can
               * change between the up and down traversal.
               */
              spin_lock_irqsave(&nh->lock, flags);
              ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
              spin_unlock_irqrestore(&nh->lock, flags);
      
              return ret;
      }
      EXPORT_SYMBOL_GPL(atomic_notifier_call_chain_robust);
      NOKPROBE_SYMBOL(atomic_notifier_call_chain_robust);
      
      /**
       *        atomic_notifier_call_chain - Call functions in an atomic notifier chain
       *        @nh: Pointer to head of the atomic notifier chain
       *        @val: Value passed unmodified to notifier function
       *        @v: Pointer passed unmodified to notifier function
       *
       *        Calls each function in a notifier chain in turn.  The functions
       *        run in an atomic context, so they must not block.
       *        This routine uses RCU to synchronize with changes to the chain.
       *
       *        If the return value of the notifier can be and'ed
       *        with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
       *        will return immediately, with the return value of
       *        the notifier function which halted execution.
       *        Otherwise the return value is the return value
       *        of the last notifier function called.
       */
      int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
                                     unsigned long val, void *v)
      {
              int ret;
      
   31         rcu_read_lock();
   31         ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
   31         rcu_read_unlock();
      
              return ret;
      }
      EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
      NOKPROBE_SYMBOL(atomic_notifier_call_chain);
      
      /*
       *        Blocking notifier chain routines.  All access to the chain is
       *        synchronized by an rwsem.
       */
      
      /**
       *        blocking_notifier_chain_register - Add notifier to a blocking notifier chain
       *        @nh: Pointer to head of the blocking notifier chain
       *        @n: New entry in notifier chain
       *
       *        Adds a notifier to a blocking notifier chain.
       *        Must be called in process context.
       *
       *        Currently always returns zero.
       */
      int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
                      struct notifier_block *n)
      {
              int ret;
      
              /*
               * This code gets used during boot-up, when task switching is
               * not yet working and interrupts must remain disabled.  At
               * such times we must not call down_write().
               */
              if (unlikely(system_state == SYSTEM_BOOTING))
                      return notifier_chain_register(&nh->head, n);
      
              down_write(&nh->rwsem);
              ret = notifier_chain_register(&nh->head, n);
              up_write(&nh->rwsem);
              return ret;
      }
      EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
      
      /**
       *        blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
       *        @nh: Pointer to head of the blocking notifier chain
       *        @n: Entry to remove from notifier chain
       *
       *        Removes a notifier from a blocking notifier chain.
       *        Must be called from process context.
       *
       *        Returns zero on success or %-ENOENT on failure.
       */
      int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
                      struct notifier_block *n)
      {
              int ret;
      
              /*
               * This code gets used during boot-up, when task switching is
               * not yet working and interrupts must remain disabled.  At
               * such times we must not call down_write().
               */
              if (unlikely(system_state == SYSTEM_BOOTING))
                      return notifier_chain_unregister(&nh->head, n);
      
              down_write(&nh->rwsem);
              ret = notifier_chain_unregister(&nh->head, n);
              up_write(&nh->rwsem);
              return ret;
      }
      EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
      
      int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh,
                      unsigned long val_up, unsigned long val_down, void *v)
      {
              int ret = NOTIFY_DONE;
      
              /*
               * We check the head outside the lock, but if this access is
               * racy then it does not matter what the result of the test
               * is, we re-check the list after having taken the lock anyway:
               */
              if (rcu_access_pointer(nh->head)) {
                      down_read(&nh->rwsem);
                      ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
                      up_read(&nh->rwsem);
              }
              return ret;
      }
      EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_robust);
      
      /**
       *        blocking_notifier_call_chain - Call functions in a blocking notifier chain
       *        @nh: Pointer to head of the blocking notifier chain
       *        @val: Value passed unmodified to notifier function
       *        @v: Pointer passed unmodified to notifier function
       *
       *        Calls each function in a notifier chain in turn.  The functions
       *        run in a process context, so they are allowed to block.
       *
       *        If the return value of the notifier can be and'ed
       *        with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
       *        will return immediately, with the return value of
       *        the notifier function which halted execution.
       *        Otherwise the return value is the return value
       *        of the last notifier function called.
       */
      int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
                      unsigned long val, void *v)
      {
              int ret = NOTIFY_DONE;
      
              /*
               * We check the head outside the lock, but if this access is
               * racy then it does not matter what the result of the test
               * is, we re-check the list after having taken the lock anyway:
               */
              if (rcu_access_pointer(nh->head)) {
                      down_read(&nh->rwsem);
                      ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
                      up_read(&nh->rwsem);
              }
              return ret;
      }
      EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
      
      /*
       *        Raw notifier chain routines.  There is no protection;
       *        the caller must provide it.  Use at your own risk!
       */
      
      /**
       *        raw_notifier_chain_register - Add notifier to a raw notifier chain
       *        @nh: Pointer to head of the raw notifier chain
       *        @n: New entry in notifier chain
       *
       *        Adds a notifier to a raw notifier chain.
       *        All locking must be provided by the caller.
       *
       *        Currently always returns zero.
       */
      int raw_notifier_chain_register(struct raw_notifier_head *nh,
                      struct notifier_block *n)
      {
              return notifier_chain_register(&nh->head, n);
      }
      EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
      
      /**
       *        raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
       *        @nh: Pointer to head of the raw notifier chain
       *        @n: Entry to remove from notifier chain
       *
       *        Removes a notifier from a raw notifier chain.
       *        All locking must be provided by the caller.
       *
       *        Returns zero on success or %-ENOENT on failure.
       */
      int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
                      struct notifier_block *n)
      {
              return notifier_chain_unregister(&nh->head, n);
      }
      EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
      
      int raw_notifier_call_chain_robust(struct raw_notifier_head *nh,
                      unsigned long val_up, unsigned long val_down, void *v)
      {
              return notifier_call_chain_robust(&nh->head, val_up, val_down, v);
      }
      EXPORT_SYMBOL_GPL(raw_notifier_call_chain_robust);
      
      /**
       *        raw_notifier_call_chain - Call functions in a raw notifier chain
       *        @nh: Pointer to head of the raw notifier chain
       *        @val: Value passed unmodified to notifier function
       *        @v: Pointer passed unmodified to notifier function
       *
       *        Calls each function in a notifier chain in turn.  The functions
       *        run in an undefined context.
       *        All locking must be provided by the caller.
       *
       *        If the return value of the notifier can be and'ed
       *        with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
       *        will return immediately, with the return value of
       *        the notifier function which halted execution.
       *        Otherwise the return value is the return value
       *        of the last notifier function called.
       */
      int raw_notifier_call_chain(struct raw_notifier_head *nh,
                      unsigned long val, void *v)
      {
              return notifier_call_chain(&nh->head, val, v, -1, NULL);
      }
      EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
      
      #ifdef CONFIG_SRCU
      /*
       *        SRCU notifier chain routines.    Registration and unregistration
       *        use a mutex, and call_chain is synchronized by SRCU (no locks).
       */
      
      /**
       *        srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
       *        @nh: Pointer to head of the SRCU notifier chain
       *        @n: New entry in notifier chain
       *
       *        Adds a notifier to an SRCU notifier chain.
       *        Must be called in process context.
       *
       *        Currently always returns zero.
       */
      int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
                      struct notifier_block *n)
      {
              int ret;
      
              /*
               * This code gets used during boot-up, when task switching is
               * not yet working and interrupts must remain disabled.  At
               * such times we must not call mutex_lock().
               */
              if (unlikely(system_state == SYSTEM_BOOTING))
                      return notifier_chain_register(&nh->head, n);
      
              mutex_lock(&nh->mutex);
              ret = notifier_chain_register(&nh->head, n);
              mutex_unlock(&nh->mutex);
              return ret;
      }
      EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
      
      /**
       *        srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
       *        @nh: Pointer to head of the SRCU notifier chain
       *        @n: Entry to remove from notifier chain
       *
       *        Removes a notifier from an SRCU notifier chain.
       *        Must be called from process context.
       *
       *        Returns zero on success or %-ENOENT on failure.
       */
      int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
                      struct notifier_block *n)
      {
              int ret;
      
              /*
               * This code gets used during boot-up, when task switching is
               * not yet working and interrupts must remain disabled.  At
               * such times we must not call mutex_lock().
               */
              if (unlikely(system_state == SYSTEM_BOOTING))
                      return notifier_chain_unregister(&nh->head, n);
      
              mutex_lock(&nh->mutex);
              ret = notifier_chain_unregister(&nh->head, n);
              mutex_unlock(&nh->mutex);
              synchronize_srcu(&nh->srcu);
              return ret;
      }
      EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
      
      /**
       *        srcu_notifier_call_chain - Call functions in an SRCU notifier chain
       *        @nh: Pointer to head of the SRCU notifier chain
       *        @val: Value passed unmodified to notifier function
       *        @v: Pointer passed unmodified to notifier function
       *
       *        Calls each function in a notifier chain in turn.  The functions
       *        run in a process context, so they are allowed to block.
       *
       *        If the return value of the notifier can be and'ed
       *        with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
       *        will return immediately, with the return value of
       *        the notifier function which halted execution.
       *        Otherwise the return value is the return value
       *        of the last notifier function called.
       */
      int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
                      unsigned long val, void *v)
      {
              int ret;
              int idx;
      
              idx = srcu_read_lock(&nh->srcu);
              ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
              srcu_read_unlock(&nh->srcu, idx);
              return ret;
      }
      EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
      
      /**
       *        srcu_init_notifier_head - Initialize an SRCU notifier head
       *        @nh: Pointer to head of the srcu notifier chain
       *
       *        Unlike other sorts of notifier heads, SRCU notifier heads require
       *        dynamic initialization.  Be sure to call this routine before
       *        calling any of the other SRCU notifier routines for this head.
       *
       *        If an SRCU notifier head is deallocated, it must first be cleaned
       *        up by calling srcu_cleanup_notifier_head().  Otherwise the head's
       *        per-cpu data (used by the SRCU mechanism) will leak.
       */
      void srcu_init_notifier_head(struct srcu_notifier_head *nh)
      {
              mutex_init(&nh->mutex);
              if (init_srcu_struct(&nh->srcu) < 0)
                      BUG();
              nh->head = NULL;
      }
      EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
      
      #endif /* CONFIG_SRCU */
      
      static ATOMIC_NOTIFIER_HEAD(die_chain);
      
      int notrace notify_die(enum die_val val, const char *str,
                     struct pt_regs *regs, long err, int trap, int sig)
      {
              struct die_args args = {
                      .regs        = regs,
                      .str        = str,
                      .err        = err,
                      .trapnr        = trap,
                      .signr        = sig,
      
              };
              RCU_LOCKDEP_WARN(!rcu_is_watching(),
                                 "notify_die called but RCU thinks we're quiescent");
              return atomic_notifier_call_chain(&die_chain, val, &args);
      }
      NOKPROBE_SYMBOL(notify_die);
      
      int register_die_notifier(struct notifier_block *nb)
      {
              return atomic_notifier_chain_register(&die_chain, nb);
      }
      EXPORT_SYMBOL_GPL(register_die_notifier);
      
      int unregister_die_notifier(struct notifier_block *nb)
      {
              return atomic_notifier_chain_unregister(&die_chain, nb);
      }
      EXPORT_SYMBOL_GPL(unregister_die_notifier);
      // SPDX-License-Identifier: GPL-2.0
      #include <linux/export.h>
      #include <linux/lockref.h>
      
      #if USE_CMPXCHG_LOCKREF
      
      /*
       * Note that the "cmpxchg()" reloads the "old" value for the
       * failure case.
       */
      #define CMPXCHG_LOOP(CODE, SUCCESS) do {                                        \
              int retry = 100;                                                        \
              struct lockref old;                                                        \
              BUILD_BUG_ON(sizeof(old) != 8);                                                \
              old.lock_count = READ_ONCE(lockref->lock_count);                        \
              while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {          \
                      struct lockref new = old, prev = old;                                \
                      CODE                                                                \
                      old.lock_count = cmpxchg64_relaxed(&lockref->lock_count,        \
                                                         old.lock_count,                \
                                                         new.lock_count);                \
                      if (likely(old.lock_count == prev.lock_count)) {                \
                              SUCCESS;                                                \
                      }                                                                \
                      if (!--retry)                                                        \
                              break;                                                        \
                      cpu_relax();                                                        \
              }                                                                        \
      } while (0)
      
      #else
      
      #define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
      
      #endif
      
      /**
       * lockref_get - Increments reference count unconditionally
       * @lockref: pointer to lockref structure
       *
       * This operation is only valid if you already hold a reference
       * to the object, so you know the count cannot be zero.
       */
      void lockref_get(struct lockref *lockref)
      {
              CMPXCHG_LOOP(
                      new.count++;
              ,
                      return;
              );
      
              spin_lock(&lockref->lock);
              lockref->count++;
              spin_unlock(&lockref->lock);
      }
      EXPORT_SYMBOL(lockref_get);
      
      /**
       * lockref_get_not_zero - Increments count unless the count is 0 or dead
       * @lockref: pointer to lockref structure
       * Return: 1 if count updated successfully or 0 if count was zero
       */
      int lockref_get_not_zero(struct lockref *lockref)
      {
              int retval;
      
              CMPXCHG_LOOP(
                      new.count++;
                      if (old.count <= 0)
                              return 0;
              ,
                      return 1;
              );
      
   57         spin_lock(&lockref->lock);
              retval = 0;
              if (lockref->count > 0) {
   57                 lockref->count++;
                      retval = 1;
              }
   58         spin_unlock(&lockref->lock);
              return retval;
      }
      EXPORT_SYMBOL(lockref_get_not_zero);
      
      /**
       * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
       * @lockref: pointer to lockref structure
       * Return: 1 if count updated successfully or 0 if count would become zero
       */
      int lockref_put_not_zero(struct lockref *lockref)
      {
              int retval;
      
              CMPXCHG_LOOP(
                      new.count--;
                      if (old.count <= 1)
                              return 0;
              ,
                      return 1;
              );
      
              spin_lock(&lockref->lock);
              retval = 0;
              if (lockref->count > 1) {
                      lockref->count--;
                      retval = 1;
              }
              spin_unlock(&lockref->lock);
              return retval;
      }
      EXPORT_SYMBOL(lockref_put_not_zero);
      
      /**
       * lockref_get_or_lock - Increments count unless the count is 0 or dead
       * @lockref: pointer to lockref structure
       * Return: 1 if count updated successfully or 0 if count was zero
       * and we got the lock instead.
       */
      int lockref_get_or_lock(struct lockref *lockref)
      {
              CMPXCHG_LOOP(
                      new.count++;
                      if (old.count <= 0)
                              break;
              ,
                      return 1;
              );
      
              spin_lock(&lockref->lock);
              if (lockref->count <= 0)
                      return 0;
              lockref->count++;
              spin_unlock(&lockref->lock);
              return 1;
      }
      EXPORT_SYMBOL(lockref_get_or_lock);
      
      /**
       * lockref_put_return - Decrement reference count if possible
       * @lockref: pointer to lockref structure
       *
       * Decrement the reference count and return the new value.
       * If the lockref was dead or locked, return an error.
       */
      int lockref_put_return(struct lockref *lockref)
      {
              CMPXCHG_LOOP(
                      new.count--;
                      if (old.count <= 0)
                              return -1;
              ,
                      return new.count;
              );
  211         return -1;
      }
      EXPORT_SYMBOL(lockref_put_return);
      
      /**
       * lockref_put_or_lock - decrements count unless count <= 1 before decrement
       * @lockref: pointer to lockref structure
       * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
       */
      int lockref_put_or_lock(struct lockref *lockref)
      {
              CMPXCHG_LOOP(
                      new.count--;
                      if (old.count <= 1)
                              break;
              ,
                      return 1;
              );
      
  525         spin_lock(&lockref->lock);
              if (lockref->count <= 1)
                      return 0;
  490         lockref->count--;
              spin_unlock(&lockref->lock);
  525         return 1;
      }
      EXPORT_SYMBOL(lockref_put_or_lock);
      
      /**
       * lockref_mark_dead - mark lockref dead
       * @lockref: pointer to lockref structure
       */
      void lockref_mark_dead(struct lockref *lockref)
      {
   79         assert_spin_locked(&lockref->lock);
   79         lockref->count = -128;
      }
      EXPORT_SYMBOL(lockref_mark_dead);
      
      /**
       * lockref_get_not_dead - Increments count unless the ref is dead
       * @lockref: pointer to lockref structure
       * Return: 1 if count updated successfully or 0 if lockref was dead
       */
      int lockref_get_not_dead(struct lockref *lockref)
      {
              int retval;
      
              CMPXCHG_LOOP(
                      new.count++;
                      if (old.count < 0)
                              return 0;
              ,
                      return 1;
              );
      
  518         spin_lock(&lockref->lock);
              retval = 0;
              if (lockref->count >= 0) {
  517                 lockref->count++;
                      retval = 1;
              }
  518         spin_unlock(&lockref->lock);
              return retval;
      }
      EXPORT_SYMBOL(lockref_get_not_dead);
      // SPDX-License-Identifier: GPL-2.0
      /*
       * drivers/base/core.c - core driver model code (device registration, etc)
       *
       * Copyright (c) 2002-3 Patrick Mochel
       * Copyright (c) 2002-3 Open Source Development Labs
       * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de>
       * Copyright (c) 2006 Novell, Inc.
       */
      
      #include <linux/acpi.h>
      #include <linux/cpufreq.h>
      #include <linux/device.h>
      #include <linux/err.h>
      #include <linux/fwnode.h>
      #include <linux/init.h>
      #include <linux/module.h>
      #include <linux/slab.h>
      #include <linux/string.h>
      #include <linux/kdev_t.h>
      #include <linux/notifier.h>
      #include <linux/of.h>
      #include <linux/of_device.h>
      #include <linux/genhd.h>
      #include <linux/mutex.h>
      #include <linux/pm_runtime.h>
      #include <linux/netdevice.h>
      #include <linux/sched/signal.h>
      #include <linux/sched/mm.h>
      #include <linux/sysfs.h>
      
      #include "base.h"
      #include "power/power.h"
      
      #ifdef CONFIG_SYSFS_DEPRECATED
      #ifdef CONFIG_SYSFS_DEPRECATED_V2
      long sysfs_deprecated = 1;
      #else
      long sysfs_deprecated = 0;
      #endif
      static int __init sysfs_deprecated_setup(char *arg)
      {
              return kstrtol(arg, 10, &sysfs_deprecated);
      }
      early_param("sysfs.deprecated", sysfs_deprecated_setup);
      #endif
      
      /* Device links support. */
      static LIST_HEAD(deferred_sync);
      static unsigned int defer_sync_state_count = 1;
      static DEFINE_MUTEX(fwnode_link_lock);
      static bool fw_devlink_is_permissive(void);
      
      /**
       * fwnode_link_add - Create a link between two fwnode_handles.
       * @con: Consumer end of the link.
       * @sup: Supplier end of the link.
       *
       * Create a fwnode link between fwnode handles @con and @sup. The fwnode link
       * represents the detail that the firmware lists @sup fwnode as supplying a
       * resource to @con.
       *
       * The driver core will use the fwnode link to create a device link between the
       * two device objects corresponding to @con and @sup when they are created. The
       * driver core will automatically delete the fwnode link between @con and @sup
       * after doing that.
       *
       * Attempts to create duplicate links between the same pair of fwnode handles
       * are ignored and there is no reference counting.
       */
      int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup)
      {
              struct fwnode_link *link;
              int ret = 0;
      
              mutex_lock(&fwnode_link_lock);
      
              list_for_each_entry(link, &sup->consumers, s_hook)
                      if (link->consumer == con)
                              goto out;
      
              link = kzalloc(sizeof(*link), GFP_KERNEL);
              if (!link) {
                      ret = -ENOMEM;
                      goto out;
              }
      
              link->supplier = sup;
              INIT_LIST_HEAD(&link->s_hook);
              link->consumer = con;
              INIT_LIST_HEAD(&link->c_hook);
      
              list_add(&link->s_hook, &sup->consumers);
              list_add(&link->c_hook, &con->suppliers);
      out:
              mutex_unlock(&fwnode_link_lock);
      
              return ret;
      }
      
      /**
       * fwnode_links_purge_suppliers - Delete all supplier links of fwnode_handle.
       * @fwnode: fwnode whose supplier links need to be deleted
       *
       * Deletes all supplier links connecting directly to @fwnode.
       */
      static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode)
      {
              struct fwnode_link *link, *tmp;
      
              mutex_lock(&fwnode_link_lock);
              list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
                      list_del(&link->s_hook);
                      list_del(&link->c_hook);
                      kfree(link);
              }
              mutex_unlock(&fwnode_link_lock);
      }
      
      /**
       * fwnode_links_purge_consumers - Delete all consumer links of fwnode_handle.
       * @fwnode: fwnode whose consumer links need to be deleted
       *
       * Deletes all consumer links connecting directly to @fwnode.
       */
      static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode)
      {
              struct fwnode_link *link, *tmp;
      
              mutex_lock(&fwnode_link_lock);
              list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) {
                      list_del(&link->s_hook);
                      list_del(&link->c_hook);
                      kfree(link);
              }
              mutex_unlock(&fwnode_link_lock);
      }
      
      /**
       * fwnode_links_purge - Delete all links connected to a fwnode_handle.
       * @fwnode: fwnode whose links needs to be deleted
       *
       * Deletes all links connecting directly to a fwnode.
       */
      void fwnode_links_purge(struct fwnode_handle *fwnode)
      {
              fwnode_links_purge_suppliers(fwnode);
              fwnode_links_purge_consumers(fwnode);
      }
      
      #ifdef CONFIG_SRCU
      static DEFINE_MUTEX(device_links_lock);
      DEFINE_STATIC_SRCU(device_links_srcu);
      
      static inline void device_links_write_lock(void)
      {
              mutex_lock(&device_links_lock);
      }
      
      static inline void device_links_write_unlock(void)
      {
              mutex_unlock(&device_links_lock);
      }
      
      int device_links_read_lock(void) __acquires(&device_links_srcu)
      {
              return srcu_read_lock(&device_links_srcu);
      }
      
      void device_links_read_unlock(int idx) __releases(&device_links_srcu)
      {
              srcu_read_unlock(&device_links_srcu, idx);
      }
      
      int device_links_read_lock_held(void)
      {
              return srcu_read_lock_held(&device_links_srcu);
      }
      #else /* !CONFIG_SRCU */
      static DECLARE_RWSEM(device_links_lock);
      
      static inline void device_links_write_lock(void)
      {
              down_write(&device_links_lock);
      }
      
      static inline void device_links_write_unlock(void)
      {
              up_write(&device_links_lock);
      }
      
      int device_links_read_lock(void)
      {
              down_read(&device_links_lock);
              return 0;
      }
      
      void device_links_read_unlock(int not_used)
      {
              up_read(&device_links_lock);
      }
      
      #ifdef CONFIG_DEBUG_LOCK_ALLOC
      int device_links_read_lock_held(void)
      {
              return lockdep_is_held(&device_links_lock);
      }
      #endif
      #endif /* !CONFIG_SRCU */
      
      /**
       * device_is_dependent - Check if one device depends on another one
       * @dev: Device to check dependencies for.
       * @target: Device to check against.
       *
       * Check if @target depends on @dev or any device dependent on it (its child or
       * its consumer etc).  Return 1 if that is the case or 0 otherwise.
       */
      int device_is_dependent(struct device *dev, void *target)
      {
              struct device_link *link;
              int ret;
      
              if (dev == target)
                      return 1;
      
              ret = device_for_each_child(dev, target, device_is_dependent);
              if (ret)
                      return ret;
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
                              continue;
      
                      if (link->consumer == target)
                              return 1;
      
                      ret = device_is_dependent(link->consumer, target);
                      if (ret)
                              break;
              }
              return ret;
      }
      
      static void device_link_init_status(struct device_link *link,
                                          struct device *consumer,
                                          struct device *supplier)
      {
              switch (supplier->links.status) {
              case DL_DEV_PROBING:
                      switch (consumer->links.status) {
                      case DL_DEV_PROBING:
                              /*
                               * A consumer driver can create a link to a supplier
                               * that has not completed its probing yet as long as it
                               * knows that the supplier is already functional (for
                               * example, it has just acquired some resources from the
                               * supplier).
                               */
                              link->status = DL_STATE_CONSUMER_PROBE;
                              break;
                      default:
                              link->status = DL_STATE_DORMANT;
                              break;
                      }
                      break;
              case DL_DEV_DRIVER_BOUND:
                      switch (consumer->links.status) {
                      case DL_DEV_PROBING:
                              link->status = DL_STATE_CONSUMER_PROBE;
                              break;
                      case DL_DEV_DRIVER_BOUND:
                              link->status = DL_STATE_ACTIVE;
                              break;
                      default:
                              link->status = DL_STATE_AVAILABLE;
                              break;
                      }
                      break;
              case DL_DEV_UNBINDING:
                      link->status = DL_STATE_SUPPLIER_UNBIND;
                      break;
              default:
                      link->status = DL_STATE_DORMANT;
                      break;
              }
      }
      
      static int device_reorder_to_tail(struct device *dev, void *not_used)
      {
              struct device_link *link;
      
              /*
               * Devices that have not been registered yet will be put to the ends
               * of the lists during the registration, so skip them here.
               */
              if (device_is_registered(dev))
                      devices_kset_move_last(dev);
      
              if (device_pm_initialized(dev))
                      device_pm_move_last(dev);
      
              device_for_each_child(dev, NULL, device_reorder_to_tail);
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
                              continue;
                      device_reorder_to_tail(link->consumer, NULL);
              }
      
              return 0;
      }
      
      /**
       * device_pm_move_to_tail - Move set of devices to the end of device lists
       * @dev: Device to move
       *
       * This is a device_reorder_to_tail() wrapper taking the requisite locks.
       *
       * It moves the @dev along with all of its children and all of its consumers
       * to the ends of the device_kset and dpm_list, recursively.
       */
      void device_pm_move_to_tail(struct device *dev)
      {
              int idx;
      
              idx = device_links_read_lock();
              device_pm_lock();
              device_reorder_to_tail(dev, NULL);
              device_pm_unlock();
              device_links_read_unlock(idx);
      }
      
      #define to_devlink(dev)        container_of((dev), struct device_link, link_dev)
      
      static ssize_t status_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
      {
              const char *output;
      
              switch (to_devlink(dev)->status) {
              case DL_STATE_NONE:
                      output = "not tracked";
                      break;
              case DL_STATE_DORMANT:
                      output = "dormant";
                      break;
              case DL_STATE_AVAILABLE:
                      output = "available";
                      break;
              case DL_STATE_CONSUMER_PROBE:
                      output = "consumer probing";
                      break;
              case DL_STATE_ACTIVE:
                      output = "active";
                      break;
              case DL_STATE_SUPPLIER_UNBIND:
                      output = "supplier unbinding";
                      break;
              default:
                      output = "unknown";
                      break;
              }
      
              return sysfs_emit(buf, "%s\n", output);
      }
      static DEVICE_ATTR_RO(status);
      
      static ssize_t auto_remove_on_show(struct device *dev,
                                         struct device_attribute *attr, char *buf)
      {
              struct device_link *link = to_devlink(dev);
              const char *output;
      
              if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                      output = "supplier unbind";
              else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
                      output = "consumer unbind";
              else
                      output = "never";
      
              return sysfs_emit(buf, "%s\n", output);
      }
      static DEVICE_ATTR_RO(auto_remove_on);
      
      static ssize_t runtime_pm_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
      {
              struct device_link *link = to_devlink(dev);
      
              return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME));
      }
      static DEVICE_ATTR_RO(runtime_pm);
      
      static ssize_t sync_state_only_show(struct device *dev,
                                          struct device_attribute *attr, char *buf)
      {
              struct device_link *link = to_devlink(dev);
      
              return sysfs_emit(buf, "%d\n",
                                !!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
      }
      static DEVICE_ATTR_RO(sync_state_only);
      
      static struct attribute *devlink_attrs[] = {
              &dev_attr_status.attr,
              &dev_attr_auto_remove_on.attr,
              &dev_attr_runtime_pm.attr,
              &dev_attr_sync_state_only.attr,
              NULL,
      };
      ATTRIBUTE_GROUPS(devlink);
      
      static void device_link_free(struct device_link *link)
      {
              while (refcount_dec_not_one(&link->rpm_active))
                      pm_runtime_put(link->supplier);
      
              put_device(link->consumer);
              put_device(link->supplier);
              kfree(link);
      }
      
      #ifdef CONFIG_SRCU
      static void __device_link_free_srcu(struct rcu_head *rhead)
      {
              device_link_free(container_of(rhead, struct device_link, rcu_head));
      }
      
      static void devlink_dev_release(struct device *dev)
      {
              struct device_link *link = to_devlink(dev);
      
              call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
      }
      #else
      static void devlink_dev_release(struct device *dev)
      {
              device_link_free(to_devlink(dev));
      }
      #endif
      
      static struct class devlink_class = {
              .name = "devlink",
              .owner = THIS_MODULE,
              .dev_groups = devlink_groups,
              .dev_release = devlink_dev_release,
      };
      
      static int devlink_add_symlinks(struct device *dev,
                                      struct class_interface *class_intf)
      {
              int ret;
              size_t len;
              struct device_link *link = to_devlink(dev);
              struct device *sup = link->supplier;
              struct device *con = link->consumer;
              char *buf;
      
              len = max(strlen(dev_name(sup)), strlen(dev_name(con)));
              len += strlen("supplier:") + 1;
              buf = kzalloc(len, GFP_KERNEL);
              if (!buf)
                      return -ENOMEM;
      
              ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier");
              if (ret)
                      goto out;
      
              ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer");
              if (ret)
                      goto err_con;
      
              snprintf(buf, len, "consumer:%s", dev_name(con));
              ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf);
              if (ret)
                      goto err_con_dev;
      
              snprintf(buf, len, "supplier:%s", dev_name(sup));
              ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf);
              if (ret)
                      goto err_sup_dev;
      
              goto out;
      
      err_sup_dev:
              snprintf(buf, len, "consumer:%s", dev_name(con));
              sysfs_remove_link(&sup->kobj, buf);
      err_con_dev:
              sysfs_remove_link(&link->link_dev.kobj, "consumer");
      err_con:
              sysfs_remove_link(&link->link_dev.kobj, "supplier");
      out:
              kfree(buf);
              return ret;
      }
      
      static void devlink_remove_symlinks(struct device *dev,
                                         struct class_interface *class_intf)
      {
              struct device_link *link = to_devlink(dev);
              size_t len;
              struct device *sup = link->supplier;
              struct device *con = link->consumer;
              char *buf;
      
              sysfs_remove_link(&link->link_dev.kobj, "consumer");
              sysfs_remove_link(&link->link_dev.kobj, "supplier");
      
              len = max(strlen(dev_name(sup)), strlen(dev_name(con)));
              len += strlen("supplier:") + 1;
              buf = kzalloc(len, GFP_KERNEL);
              if (!buf) {
                      WARN(1, "Unable to properly free device link symlinks!\n");
                      return;
              }
      
              snprintf(buf, len, "supplier:%s", dev_name(sup));
              sysfs_remove_link(&con->kobj, buf);
              snprintf(buf, len, "consumer:%s", dev_name(con));
              sysfs_remove_link(&sup->kobj, buf);
              kfree(buf);
      }
      
      static struct class_interface devlink_class_intf = {
              .class = &devlink_class,
              .add_dev = devlink_add_symlinks,
              .remove_dev = devlink_remove_symlinks,
      };
      
      static int __init devlink_class_init(void)
      {
              int ret;
      
              ret = class_register(&devlink_class);
              if (ret)
                      return ret;
      
              ret = class_interface_register(&devlink_class_intf);
              if (ret)
                      class_unregister(&devlink_class);
      
              return ret;
      }
      postcore_initcall(devlink_class_init);
      
      #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
                                     DL_FLAG_AUTOREMOVE_SUPPLIER | \
                                     DL_FLAG_AUTOPROBE_CONSUMER  | \
                                     DL_FLAG_SYNC_STATE_ONLY)
      
      #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
                                  DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)
      
      /**
       * device_link_add - Create a link between two devices.
       * @consumer: Consumer end of the link.
       * @supplier: Supplier end of the link.
       * @flags: Link flags.
       *
       * The caller is responsible for the proper synchronization of the link creation
       * with runtime PM.  First, setting the DL_FLAG_PM_RUNTIME flag will cause the
       * runtime PM framework to take the link into account.  Second, if the
       * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will
       * be forced into the active meta state and reference-counted upon the creation
       * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
       * ignored.
       *
       * If DL_FLAG_STATELESS is set in @flags, the caller of this function is
       * expected to release the link returned by it directly with the help of either
       * device_link_del() or device_link_remove().
       *
       * If that flag is not set, however, the caller of this function is handing the
       * management of the link over to the driver core entirely and its return value
       * can only be used to check whether or not the link is present.  In that case,
       * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link
       * flags can be used to indicate to the driver core when the link can be safely
       * deleted.  Namely, setting one of them in @flags indicates to the driver core
       * that the link is not going to be used (by the given caller of this function)
       * after unbinding the consumer or supplier driver, respectively, from its
       * device, so the link can be deleted at that point.  If none of them is set,
       * the link will be maintained until one of the devices pointed to by it (either
       * the consumer or the supplier) is unregistered.
       *
       * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and
       * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent
       * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can
       * be used to request the driver core to automatically probe for a consumer
       * driver after successfully binding a driver to the supplier device.
       *
       * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER,
       * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at
       * the same time is invalid and will cause NULL to be returned upfront.
       * However, if a device link between the given @consumer and @supplier pair
       * exists already when this function is called for them, the existing link will
       * be returned regardless of its current type and status (the link's flags may
       * be modified then).  The caller of this function is then expected to treat
       * the link as though it has just been created, so (in particular) if
       * DL_FLAG_STATELESS was passed in @flags, the link needs to be released
       * explicitly when not needed any more (as stated above).
       *
       * A side effect of the link creation is re-ordering of dpm_list and the
       * devices_kset list by moving the consumer device and all devices depending
       * on it to the ends of these lists (that does not happen to devices that have
       * not been registered when this function is called).
       *
       * The supplier device is required to be registered when this function is called
       * and NULL will be returned if that is not the case.  The consumer device need
       * not be registered, however.
       */
      struct device_link *device_link_add(struct device *consumer,
                                          struct device *supplier, u32 flags)
      {
              struct device_link *link;
      
              if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS ||
                  (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
                  (flags & DL_FLAG_SYNC_STATE_ONLY &&
                   flags != DL_FLAG_SYNC_STATE_ONLY) ||
                  (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
                   flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
                            DL_FLAG_AUTOREMOVE_SUPPLIER)))
                      return NULL;
      
              if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) {
                      if (pm_runtime_get_sync(supplier) < 0) {
                              pm_runtime_put_noidle(supplier);
                              return NULL;
                      }
              }
      
              if (!(flags & DL_FLAG_STATELESS))
                      flags |= DL_FLAG_MANAGED;
      
              device_links_write_lock();
              device_pm_lock();
      
              /*
               * If the supplier has not been fully registered yet or there is a
               * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and
               * the supplier already in the graph, return NULL. If the link is a
               * SYNC_STATE_ONLY link, we don't check for reverse dependencies
               * because it only affects sync_state() callbacks.
               */
              if (!device_pm_initialized(supplier)
                  || (!(flags & DL_FLAG_SYNC_STATE_ONLY) &&
                        device_is_dependent(consumer, supplier))) {
                      link = NULL;
                      goto out;
              }
      
              /*
               * SYNC_STATE_ONLY links are useless once a consumer device has probed.
               * So, only create it if the consumer hasn't probed yet.
               */
              if (flags & DL_FLAG_SYNC_STATE_ONLY &&
                  consumer->links.status != DL_DEV_NO_DRIVER &&
                  consumer->links.status != DL_DEV_PROBING) {
                      link = NULL;
                      goto out;
              }
      
              /*
               * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed
               * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both
               * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER.
               */
              if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                      flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
      
              list_for_each_entry(link, &supplier->links.consumers, s_node) {
                      if (link->consumer != consumer)
                              continue;
      
                      if (flags & DL_FLAG_PM_RUNTIME) {
                              if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
                                      pm_runtime_new_link(consumer);
                                      link->flags |= DL_FLAG_PM_RUNTIME;
                              }
                              if (flags & DL_FLAG_RPM_ACTIVE)
                                      refcount_inc(&link->rpm_active);
                      }
      
                      if (flags & DL_FLAG_STATELESS) {
                              kref_get(&link->kref);
                              if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
                                  !(link->flags & DL_FLAG_STATELESS)) {
                                      link->flags |= DL_FLAG_STATELESS;
                                      goto reorder;
                              } else {
                                      link->flags |= DL_FLAG_STATELESS;
                                      goto out;
                              }
                      }
      
                      /*
                       * If the life time of the link following from the new flags is
                       * longer than indicated by the flags of the existing link,
                       * update the existing link to stay around longer.
                       */
                      if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) {
                              if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
                                      link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER;
                                      link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER;
                              }
                      } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) {
                              link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER |
                                               DL_FLAG_AUTOREMOVE_SUPPLIER);
                      }
                      if (!(link->flags & DL_FLAG_MANAGED)) {
                              kref_get(&link->kref);
                              link->flags |= DL_FLAG_MANAGED;
                              device_link_init_status(link, consumer, supplier);
                      }
                      if (link->flags & DL_FLAG_SYNC_STATE_ONLY &&
                          !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
                              link->flags &= ~DL_FLAG_SYNC_STATE_ONLY;
                              goto reorder;
                      }
      
                      goto out;
              }
      
              link = kzalloc(sizeof(*link), GFP_KERNEL);
              if (!link)
                      goto out;
      
              refcount_set(&link->rpm_active, 1);
      
              get_device(supplier);
              link->supplier = supplier;
              INIT_LIST_HEAD(&link->s_node);
              get_device(consumer);
              link->consumer = consumer;
              INIT_LIST_HEAD(&link->c_node);
              link->flags = flags;
              kref_init(&link->kref);
      
              link->link_dev.class = &devlink_class;
              device_set_pm_not_required(&link->link_dev);
              dev_set_name(&link->link_dev, "%s--%s",
                           dev_name(supplier), dev_name(consumer));
              if (device_register(&link->link_dev)) {
                      put_device(consumer);
                      put_device(supplier);
                      kfree(link);
                      link = NULL;
                      goto out;
              }
      
              if (flags & DL_FLAG_PM_RUNTIME) {
                      if (flags & DL_FLAG_RPM_ACTIVE)
                              refcount_inc(&link->rpm_active);
      
                      pm_runtime_new_link(consumer);
              }
      
              /* Determine the initial link state. */
              if (flags & DL_FLAG_STATELESS)
                      link->status = DL_STATE_NONE;
              else
                      device_link_init_status(link, consumer, supplier);
      
              /*
               * Some callers expect the link creation during consumer driver probe to
               * resume the supplier even without DL_FLAG_RPM_ACTIVE.
               */
              if (link->status == DL_STATE_CONSUMER_PROBE &&
                  flags & DL_FLAG_PM_RUNTIME)
                      pm_runtime_resume(supplier);
      
              list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
              list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
      
              if (flags & DL_FLAG_SYNC_STATE_ONLY) {
                      dev_dbg(consumer,
                              "Linked as a sync state only consumer to %s\n",
                              dev_name(supplier));
                      goto out;
              }
      
      reorder:
              /*
               * Move the consumer and all of the devices depending on it to the end
               * of dpm_list and the devices_kset list.
               *
               * It is necessary to hold dpm_list locked throughout all that or else
               * we may end up suspending with a wrong ordering of it.
               */
              device_reorder_to_tail(consumer, NULL);
      
              dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier));
      
      out:
              device_pm_unlock();
              device_links_write_unlock();
      
              if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link)
                      pm_runtime_put(supplier);
      
              return link;
      }
      EXPORT_SYMBOL_GPL(device_link_add);
      
      #ifdef CONFIG_SRCU
      static void __device_link_del(struct kref *kref)
      {
              struct device_link *link = container_of(kref, struct device_link, kref);
      
              dev_dbg(link->consumer, "Dropping the link to %s\n",
                      dev_name(link->supplier));
      
              pm_runtime_drop_link(link);
      
              list_del_rcu(&link->s_node);
              list_del_rcu(&link->c_node);
              device_unregister(&link->link_dev);
      }
      #else /* !CONFIG_SRCU */
      static void __device_link_del(struct kref *kref)
      {
              struct device_link *link = container_of(kref, struct device_link, kref);
      
              dev_info(link->consumer, "Dropping the link to %s\n",
                       dev_name(link->supplier));
      
              pm_runtime_drop_link(link);
      
              list_del(&link->s_node);
              list_del(&link->c_node);
              device_unregister(&link->link_dev);
      }
      #endif /* !CONFIG_SRCU */
      
      static void device_link_put_kref(struct device_link *link)
      {
              if (link->flags & DL_FLAG_STATELESS)
                      kref_put(&link->kref, __device_link_del);
              else
                      WARN(1, "Unable to drop a managed device link reference\n");
      }
      
      /**
       * device_link_del - Delete a stateless link between two devices.
       * @link: Device link to delete.
       *
       * The caller must ensure proper synchronization of this function with runtime
       * PM.  If the link was added multiple times, it needs to be deleted as often.
       * Care is required for hotplugged devices:  Their links are purged on removal
       * and calling device_link_del() is then no longer allowed.
       */
      void device_link_del(struct device_link *link)
      {
              device_links_write_lock();
              device_link_put_kref(link);
              device_links_write_unlock();
      }
      EXPORT_SYMBOL_GPL(device_link_del);
      
      /**
       * device_link_remove - Delete a stateless link between two devices.
       * @consumer: Consumer end of the link.
       * @supplier: Supplier end of the link.
       *
       * The caller must ensure proper synchronization of this function with runtime
       * PM.
       */
      void device_link_remove(void *consumer, struct device *supplier)
      {
              struct device_link *link;
      
              if (WARN_ON(consumer == supplier))
                      return;
      
              device_links_write_lock();
      
              list_for_each_entry(link, &supplier->links.consumers, s_node) {
                      if (link->consumer == consumer) {
                              device_link_put_kref(link);
                              break;
                      }
              }
      
              device_links_write_unlock();
      }
      EXPORT_SYMBOL_GPL(device_link_remove);
      
      static void device_links_missing_supplier(struct device *dev)
      {
              struct device_link *link;
      
              list_for_each_entry(link, &dev->links.suppliers, c_node) {
                      if (link->status != DL_STATE_CONSUMER_PROBE)
                              continue;
      
                      if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
                              WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
                      } else {
                              WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
                              WRITE_ONCE(link->status, DL_STATE_DORMANT);
                      }
              }
      }
      
      /**
       * device_links_check_suppliers - Check presence of supplier drivers.
       * @dev: Consumer device.
       *
       * Check links from this device to any suppliers.  Walk the list of the device's
       * links to suppliers and see if all of them are available.  If not, simply
       * return -EPROBE_DEFER.
       *
       * We need to guarantee that the supplier will not go away after the check has
       * been positive here.  It only can go away in __device_release_driver() and
       * that function  checks the device's links to consumers.  This means we need to
       * mark the link as "consumer probe in progress" to make the supplier removal
       * wait for us to complete (or bad things may happen).
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      int device_links_check_suppliers(struct device *dev)
      {
              struct device_link *link;
              int ret = 0;
      
              /*
               * Device waiting for supplier to become available is not allowed to
               * probe.
               */
              mutex_lock(&fwnode_link_lock);
              if (dev->fwnode && !list_empty(&dev->fwnode->suppliers) &&
                  !fw_devlink_is_permissive()) {
                      mutex_unlock(&fwnode_link_lock);
                      return -EPROBE_DEFER;
              }
              mutex_unlock(&fwnode_link_lock);
      
              device_links_write_lock();
      
              list_for_each_entry(link, &dev->links.suppliers, c_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      if (link->status != DL_STATE_AVAILABLE &&
                          !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) {
                              device_links_missing_supplier(dev);
                              ret = -EPROBE_DEFER;
                              break;
                      }
                      WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE);
              }
              dev->links.status = DL_DEV_PROBING;
      
              device_links_write_unlock();
              return ret;
      }
      
      /**
       * __device_links_queue_sync_state - Queue a device for sync_state() callback
       * @dev: Device to call sync_state() on
       * @list: List head to queue the @dev on
       *
       * Queues a device for a sync_state() callback when the device links write lock
       * isn't held. This allows the sync_state() execution flow to use device links
       * APIs.  The caller must ensure this function is called with
       * device_links_write_lock() held.
       *
       * This function does a get_device() to make sure the device is not freed while
       * on this list.
       *
       * So the caller must also ensure that device_links_flush_sync_list() is called
       * as soon as the caller releases device_links_write_lock().  This is necessary
       * to make sure the sync_state() is called in a timely fashion and the
       * put_device() is called on this device.
       */
      static void __device_links_queue_sync_state(struct device *dev,
                                                  struct list_head *list)
      {
              struct device_link *link;
      
              if (!dev_has_sync_state(dev))
                      return;
              if (dev->state_synced)
                      return;
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
                      if (link->status != DL_STATE_ACTIVE)
                              return;
              }
      
              /*
               * Set the flag here to avoid adding the same device to a list more
               * than once. This can happen if new consumers get added to the device
               * and probed before the list is flushed.
               */
              dev->state_synced = true;
      
              if (WARN_ON(!list_empty(&dev->links.defer_sync)))
                      return;
      
              get_device(dev);
              list_add_tail(&dev->links.defer_sync, list);
      }
      
      /**
       * device_links_flush_sync_list - Call sync_state() on a list of devices
       * @list: List of devices to call sync_state() on
       * @dont_lock_dev: Device for which lock is already held by the caller
       *
       * Calls sync_state() on all the devices that have been queued for it. This
       * function is used in conjunction with __device_links_queue_sync_state(). The
       * @dont_lock_dev parameter is useful when this function is called from a
       * context where a device lock is already held.
       */
      static void device_links_flush_sync_list(struct list_head *list,
                                               struct device *dont_lock_dev)
      {
              struct device *dev, *tmp;
      
              list_for_each_entry_safe(dev, tmp, list, links.defer_sync) {
                      list_del_init(&dev->links.defer_sync);
      
                      if (dev != dont_lock_dev)
                              device_lock(dev);
      
                      if (dev->bus->sync_state)
                              dev->bus->sync_state(dev);
                      else if (dev->driver && dev->driver->sync_state)
                              dev->driver->sync_state(dev);
      
                      if (dev != dont_lock_dev)
                              device_unlock(dev);
      
                      put_device(dev);
              }
      }
      
      void device_links_supplier_sync_state_pause(void)
      {
              device_links_write_lock();
              defer_sync_state_count++;
              device_links_write_unlock();
      }
      
      void device_links_supplier_sync_state_resume(void)
      {
              struct device *dev, *tmp;
              LIST_HEAD(sync_list);
      
              device_links_write_lock();
              if (!defer_sync_state_count) {
                      WARN(true, "Unmatched sync_state pause/resume!");
                      goto out;
              }
              defer_sync_state_count--;
              if (defer_sync_state_count)
                      goto out;
      
              list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
                      /*
                       * Delete from deferred_sync list before queuing it to
                       * sync_list because defer_sync is used for both lists.
                       */
                      list_del_init(&dev->links.defer_sync);
                      __device_links_queue_sync_state(dev, &sync_list);
              }
      out:
              device_links_write_unlock();
      
              device_links_flush_sync_list(&sync_list, NULL);
      }
      
      static int sync_state_resume_initcall(void)
      {
              device_links_supplier_sync_state_resume();
              return 0;
      }
      late_initcall(sync_state_resume_initcall);
      
      static void __device_links_supplier_defer_sync(struct device *sup)
      {
              if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup))
                      list_add_tail(&sup->links.defer_sync, &deferred_sync);
      }
      
      static void device_link_drop_managed(struct device_link *link)
      {
              link->flags &= ~DL_FLAG_MANAGED;
              WRITE_ONCE(link->status, DL_STATE_NONE);
              kref_put(&link->kref, __device_link_del);
      }
      
      static ssize_t waiting_for_supplier_show(struct device *dev,
                                               struct device_attribute *attr,
                                               char *buf)
      {
              bool val;
      
              device_lock(dev);
              val = !list_empty(&dev->fwnode->suppliers);
              device_unlock(dev);
              return sysfs_emit(buf, "%u\n", val);
      }
      static DEVICE_ATTR_RO(waiting_for_supplier);
      
      /**
       * device_links_driver_bound - Update device links after probing its driver.
       * @dev: Device to update the links for.
       *
       * The probe has been successful, so update links from this device to any
       * consumers by changing their status to "available".
       *
       * Also change the status of @dev's links to suppliers to "active".
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      void device_links_driver_bound(struct device *dev)
      {
              struct device_link *link, *ln;
              LIST_HEAD(sync_list);
      
              /*
               * If a device probes successfully, it's expected to have created all
               * the device links it needs to or make new device links as it needs
               * them. So, it no longer needs to wait on any suppliers.
               */
              if (dev->fwnode && dev->fwnode->dev == dev)
                      fwnode_links_purge_suppliers(dev->fwnode);
              device_remove_file(dev, &dev_attr_waiting_for_supplier);
      
              device_links_write_lock();
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      /*
                       * Links created during consumer probe may be in the "consumer
                       * probe" state to start with if the supplier is still probing
                       * when they are created and they may become "active" if the
                       * consumer probe returns first.  Skip them here.
                       */
                      if (link->status == DL_STATE_CONSUMER_PROBE ||
                          link->status == DL_STATE_ACTIVE)
                              continue;
      
                      WARN_ON(link->status != DL_STATE_DORMANT);
                      WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
      
                      if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER)
                              driver_deferred_probe_add(link->consumer);
              }
      
              if (defer_sync_state_count)
                      __device_links_supplier_defer_sync(dev);
              else
                      __device_links_queue_sync_state(dev, &sync_list);
      
              list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
                      struct device *supplier;
      
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      supplier = link->supplier;
                      if (link->flags & DL_FLAG_SYNC_STATE_ONLY) {
                              /*
                               * When DL_FLAG_SYNC_STATE_ONLY is set, it means no
                               * other DL_MANAGED_LINK_FLAGS have been set. So, it's
                               * save to drop the managed link completely.
                               */
                              device_link_drop_managed(link);
                      } else {
                              WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
                              WRITE_ONCE(link->status, DL_STATE_ACTIVE);
                      }
      
                      /*
                       * This needs to be done even for the deleted
                       * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last
                       * device link that was preventing the supplier from getting a
                       * sync_state() call.
                       */
                      if (defer_sync_state_count)
                              __device_links_supplier_defer_sync(supplier);
                      else
                              __device_links_queue_sync_state(supplier, &sync_list);
              }
      
              dev->links.status = DL_DEV_DRIVER_BOUND;
      
              device_links_write_unlock();
      
              device_links_flush_sync_list(&sync_list, dev);
      }
      
      /**
       * __device_links_no_driver - Update links of a device without a driver.
       * @dev: Device without a drvier.
       *
       * Delete all non-persistent links from this device to any suppliers.
       *
       * Persistent links stay around, but their status is changed to "available",
       * unless they already are in the "supplier unbind in progress" state in which
       * case they need not be updated.
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      static void __device_links_no_driver(struct device *dev)
      {
              struct device_link *link, *ln;
      
              list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) {
                              device_link_drop_managed(link);
                              continue;
                      }
      
                      if (link->status != DL_STATE_CONSUMER_PROBE &&
                          link->status != DL_STATE_ACTIVE)
                              continue;
      
                      if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) {
                              WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
                      } else {
                              WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY));
                              WRITE_ONCE(link->status, DL_STATE_DORMANT);
                      }
              }
      
              dev->links.status = DL_DEV_NO_DRIVER;
      }
      
      /**
       * device_links_no_driver - Update links after failing driver probe.
       * @dev: Device whose driver has just failed to probe.
       *
       * Clean up leftover links to consumers for @dev and invoke
       * %__device_links_no_driver() to update links to suppliers for it as
       * appropriate.
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      void device_links_no_driver(struct device *dev)
      {
              struct device_link *link;
      
              device_links_write_lock();
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      /*
                       * The probe has failed, so if the status of the link is
                       * "consumer probe" or "active", it must have been added by
                       * a probing consumer while this device was still probing.
                       * Change its state to "dormant", as it represents a valid
                       * relationship, but it is not functionally meaningful.
                       */
                      if (link->status == DL_STATE_CONSUMER_PROBE ||
                          link->status == DL_STATE_ACTIVE)
                              WRITE_ONCE(link->status, DL_STATE_DORMANT);
              }
      
              __device_links_no_driver(dev);
      
              device_links_write_unlock();
      }
      
      /**
       * device_links_driver_cleanup - Update links after driver removal.
       * @dev: Device whose driver has just gone away.
       *
       * Update links to consumers for @dev by changing their status to "dormant" and
       * invoke %__device_links_no_driver() to update links to suppliers for it as
       * appropriate.
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      void device_links_driver_cleanup(struct device *dev)
      {
              struct device_link *link, *ln;
      
              device_links_write_lock();
      
              list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
                      WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
      
                      /*
                       * autoremove the links between this @dev and its consumer
                       * devices that are not active, i.e. where the link state
                       * has moved to DL_STATE_SUPPLIER_UNBIND.
                       */
                      if (link->status == DL_STATE_SUPPLIER_UNBIND &&
                          link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
                              device_link_drop_managed(link);
      
                      WRITE_ONCE(link->status, DL_STATE_DORMANT);
              }
      
              list_del_init(&dev->links.defer_sync);
              __device_links_no_driver(dev);
      
              device_links_write_unlock();
      }
      
      /**
       * device_links_busy - Check if there are any busy links to consumers.
       * @dev: Device to check.
       *
       * Check each consumer of the device and return 'true' if its link's status
       * is one of "consumer probe" or "active" (meaning that the given consumer is
       * probing right now or its driver is present).  Otherwise, change the link
       * state to "supplier unbind" to prevent the consumer from being probed
       * successfully going forward.
       *
       * Return 'false' if there are no probing or active consumers.
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      bool device_links_busy(struct device *dev)
      {
              struct device_link *link;
              bool ret = false;
      
              device_links_write_lock();
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      if (!(link->flags & DL_FLAG_MANAGED))
                              continue;
      
                      if (link->status == DL_STATE_CONSUMER_PROBE
                          || link->status == DL_STATE_ACTIVE) {
                              ret = true;
                              break;
                      }
                      WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
              }
      
              dev->links.status = DL_DEV_UNBINDING;
      
              device_links_write_unlock();
              return ret;
      }
      
      /**
       * device_links_unbind_consumers - Force unbind consumers of the given device.
       * @dev: Device to unbind the consumers of.
       *
       * Walk the list of links to consumers for @dev and if any of them is in the
       * "consumer probe" state, wait for all device probes in progress to complete
       * and start over.
       *
       * If that's not the case, change the status of the link to "supplier unbind"
       * and check if the link was in the "active" state.  If so, force the consumer
       * driver to unbind and start over (the consumer will not re-probe as we have
       * changed the state of the link already).
       *
       * Links without the DL_FLAG_MANAGED flag set are ignored.
       */
      void device_links_unbind_consumers(struct device *dev)
      {
              struct device_link *link;
      
       start:
              device_links_write_lock();
      
              list_for_each_entry(link, &dev->links.consumers, s_node) {
                      enum device_link_state status;
      
                      if (!(link->flags & DL_FLAG_MANAGED) ||
                          link->flags & DL_FLAG_SYNC_STATE_ONLY)
                              continue;
      
                      status = link->status;
                      if (status == DL_STATE_CONSUMER_PROBE) {
                              device_links_write_unlock();
      
                              wait_for_device_probe();
                              goto start;
                      }
                      WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND);
                      if (status == DL_STATE_ACTIVE) {
                              struct device *consumer = link->consumer;
      
                              get_device(consumer);
      
                              device_links_write_unlock();
      
                              device_release_driver_internal(consumer, NULL,
                                                             consumer->parent);
                              put_device(consumer);
                              goto start;
                      }
              }
      
              device_links_write_unlock();
      }
      
      /**
       * device_links_purge - Delete existing links to other devices.
       * @dev: Target device.
       */
      static void device_links_purge(struct device *dev)
      {
              struct device_link *link, *ln;
      
              if (dev->class == &devlink_class)
                      return;
      
              /*
               * Delete all of the remaining links from this device to any other
               * devices (either consumers or suppliers).
               */
              device_links_write_lock();
      
              list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
                      WARN_ON(link->status == DL_STATE_ACTIVE);
                      __device_link_del(&link->kref);
              }
      
              list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) {
                      WARN_ON(link->status != DL_STATE_DORMANT &&
                              link->status != DL_STATE_NONE);
                      __device_link_del(&link->kref);
              }
      
              device_links_write_unlock();
      }
      
      static u32 fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY;
      static int __init fw_devlink_setup(char *arg)
      {
              if (!arg)
                      return -EINVAL;
      
              if (strcmp(arg, "off") == 0) {
                      fw_devlink_flags = 0;
              } else if (strcmp(arg, "permissive") == 0) {
                      fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY;
              } else if (strcmp(arg, "on") == 0) {
                      fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER;
              } else if (strcmp(arg, "rpm") == 0) {
                      fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER |
                                         DL_FLAG_PM_RUNTIME;
              }
              return 0;
      }
      early_param("fw_devlink", fw_devlink_setup);
      
      u32 fw_devlink_get_flags(void)
      {
              return fw_devlink_flags;
      }
      
      static bool fw_devlink_is_permissive(void)
      {
              return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
      }
      
      static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode)
      {
              if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED)
                      return;
      
              fwnode_call_int_op(fwnode, add_links);
              fwnode->flags |= FWNODE_FLAG_LINKS_ADDED;
      }
      
      static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode)
      {
              struct fwnode_handle *child = NULL;
      
              fw_devlink_parse_fwnode(fwnode);
      
              while ((child = fwnode_get_next_available_child_node(fwnode, child)))
                      fw_devlink_parse_fwtree(child);
      }
      
      /**
       * fw_devlink_create_devlink - Create a device link from a consumer to fwnode
       * @con - Consumer device for the device link
       * @sup_handle - fwnode handle of supplier
       *
       * This function will try to create a device link between the consumer device
       * @con and the supplier device represented by @sup_handle.
       *
       * The supplier has to be provided as a fwnode because incorrect cycles in
       * fwnode links can sometimes cause the supplier device to never be created.
       * This function detects such cases and returns an error if it cannot create a
       * device link from the consumer to a missing supplier.
       *
       * Returns,
       * 0 on successfully creating a device link
       * -EINVAL if the device link cannot be created as expected
       * -EAGAIN if the device link cannot be created right now, but it may be
       *  possible to do that in the future
       */
      static int fw_devlink_create_devlink(struct device *con,
                                           struct fwnode_handle *sup_handle, u32 flags)
      {
              struct device *sup_dev;
              int ret = 0;
      
              sup_dev = get_dev_from_fwnode(sup_handle);
              if (sup_dev) {
                      /*
                       * If this fails, it is due to cycles in device links.  Just
                       * give up on this link and treat it as invalid.
                       */
                      if (!device_link_add(con, sup_dev, flags))
                              ret = -EINVAL;
      
                      goto out;
              }
      
              /*
               * DL_FLAG_SYNC_STATE_ONLY doesn't block probing and supports
               * cycles. So cycle detection isn't necessary and shouldn't be
               * done.
               */
              if (flags & DL_FLAG_SYNC_STATE_ONLY)
                      return -EAGAIN;
      
              /*
               * If we can't find the supplier device from its fwnode, it might be
               * due to a cyclic dependency between fwnodes. Some of these cycles can
               * be broken by applying logic. Check for these types of cycles and
               * break them so that devices in the cycle probe properly.
               *
               * If the supplier's parent is dependent on the consumer, then
               * the consumer-supplier dependency is a false dependency. So,
               * treat it as an invalid link.
               */
              sup_dev = fwnode_get_next_parent_dev(sup_handle);
              if (sup_dev && device_is_dependent(con, sup_dev)) {
                      dev_dbg(con, "Not linking to %pfwP - False link\n",
                              sup_handle);
                      ret = -EINVAL;
              } else {
                      /*
                       * Can't check for cycles or no cycles. So let's try
                       * again later.
                       */
                      ret = -EAGAIN;
              }
      
      out:
              put_device(sup_dev);
              return ret;
      }
      
      /**
       * __fw_devlink_link_to_consumers - Create device links to consumers of a device
       * @dev - Device that needs to be linked to its consumers
       *
       * This function looks at all the consumer fwnodes of @dev and creates device
       * links between the consumer device and @dev (supplier).
       *
       * If the consumer device has not been added yet, then this function creates a
       * SYNC_STATE_ONLY link between @dev (supplier) and the closest ancestor device
       * of the consumer fwnode. This is necessary to make sure @dev doesn't get a
       * sync_state() callback before the real consumer device gets to be added and
       * then probed.
       *
       * Once device links are created from the real consumer to @dev (supplier), the
       * fwnode links are deleted.
       */
      static void __fw_devlink_link_to_consumers(struct device *dev)
      {
              struct fwnode_handle *fwnode = dev->fwnode;
              struct fwnode_link *link, *tmp;
      
              list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) {
                      u32 dl_flags = fw_devlink_get_flags();
                      struct device *con_dev;
                      bool own_link = true;
                      int ret;
      
                      con_dev = get_dev_from_fwnode(link->consumer);
                      /*
                       * If consumer device is not available yet, make a "proxy"
                       * SYNC_STATE_ONLY link from the consumer's parent device to
                       * the supplier device. This is necessary to make sure the
                       * supplier doesn't get a sync_state() callback before the real
                       * consumer can create a device link to the supplier.
                       *
                       * This proxy link step is needed to handle the case where the
                       * consumer's parent device is added before the supplier.
                       */
                      if (!con_dev) {
                              con_dev = fwnode_get_next_parent_dev(link->consumer);
                              /*
                               * However, if the consumer's parent device is also the
                               * parent of the supplier, don't create a
                               * consumer-supplier link from the parent to its child
                               * device. Such a dependency is impossible.
                               */
                              if (con_dev &&
                                  fwnode_is_ancestor_of(con_dev->fwnode, fwnode)) {
                                      put_device(con_dev);
                                      con_dev = NULL;
                              } else {
                                      own_link = false;
                                      dl_flags = DL_FLAG_SYNC_STATE_ONLY;
                              }
                      }
      
                      if (!con_dev)
                              continue;
      
                      ret = fw_devlink_create_devlink(con_dev, fwnode, dl_flags);
                      put_device(con_dev);
                      if (!own_link || ret == -EAGAIN)
                              continue;
      
                      list_del(&link->s_hook);
                      list_del(&link->c_hook);
                      kfree(link);
              }
      }
      
      /**
       * __fw_devlink_link_to_suppliers - Create device links to suppliers of a device
       * @dev - The consumer device that needs to be linked to its suppliers
       * @fwnode - Root of the fwnode tree that is used to create device links
       *
       * This function looks at all the supplier fwnodes of fwnode tree rooted at
       * @fwnode and creates device links between @dev (consumer) and all the
       * supplier devices of the entire fwnode tree at @fwnode.
       *
       * The function creates normal (non-SYNC_STATE_ONLY) device links between @dev
       * and the real suppliers of @dev. Once these device links are created, the
       * fwnode links are deleted. When such device links are successfully created,
       * this function is called recursively on those supplier devices. This is
       * needed to detect and break some invalid cycles in fwnode links.  See
       * fw_devlink_create_devlink() for more details.
       *
       * In addition, it also looks at all the suppliers of the entire fwnode tree
       * because some of the child devices of @dev that have not been added yet
       * (because @dev hasn't probed) might already have their suppliers added to
       * driver core. So, this function creates SYNC_STATE_ONLY device links between
       * @dev (consumer) and these suppliers to make sure they don't execute their
       * sync_state() callbacks before these child devices have a chance to create
       * their device links. The fwnode links that correspond to the child devices
       * aren't delete because they are needed later to create the device links
       * between the real consumer and supplier devices.
       */
      static void __fw_devlink_link_to_suppliers(struct device *dev,
                                                 struct fwnode_handle *fwnode)
      {
              bool own_link = (dev->fwnode == fwnode);
              struct fwnode_link *link, *tmp;
              struct fwnode_handle *child = NULL;
              u32 dl_flags;
      
              if (own_link)
                      dl_flags = fw_devlink_get_flags();
              else
                      dl_flags = DL_FLAG_SYNC_STATE_ONLY;
      
              list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
                      int ret;
                      struct device *sup_dev;
                      struct fwnode_handle *sup = link->supplier;
      
                      ret = fw_devlink_create_devlink(dev, sup, dl_flags);
                      if (!own_link || ret == -EAGAIN)
                              continue;
      
                      list_del(&link->s_hook);
                      list_del(&link->c_hook);
                      kfree(link);
      
                      /* If no device link was created, nothing more to do. */
                      if (ret)
                              continue;
      
                      /*
                       * If a device link was successfully created to a supplier, we
                       * now need to try and link the supplier to all its suppliers.
                       *
                       * This is needed to detect and delete false dependencies in
                       * fwnode links that haven't been converted to a device link
                       * yet. See comments in fw_devlink_create_devlink() for more
                       * details on the false dependency.
                       *
                       * Without deleting these false dependencies, some devices will
                       * never probe because they'll keep waiting for their false
                       * dependency fwnode links to be converted to device links.
                       */
                      sup_dev = get_dev_from_fwnode(sup);
                      __fw_devlink_link_to_suppliers(sup_dev, sup_dev->fwnode);
                      put_device(sup_dev);
              }
      
              /*
               * Make "proxy" SYNC_STATE_ONLY device links to represent the needs of
               * all the descendants. This proxy link step is needed to handle the
               * case where the supplier is added before the consumer's parent device
               * (@dev).
               */
              while ((child = fwnode_get_next_available_child_node(fwnode, child)))
                      __fw_devlink_link_to_suppliers(dev, child);
      }
      
      static void fw_devlink_link_device(struct device *dev)
      {
              struct fwnode_handle *fwnode = dev->fwnode;
      
              if (!fw_devlink_flags)
                      return;
      
              fw_devlink_parse_fwtree(fwnode);
      
              mutex_lock(&fwnode_link_lock);
              __fw_devlink_link_to_consumers(dev);
              __fw_devlink_link_to_suppliers(dev, fwnode);
              mutex_unlock(&fwnode_link_lock);
      }
      
      /* Device links support end. */
      
      int (*platform_notify)(struct device *dev) = NULL;
      int (*platform_notify_remove)(struct device *dev) = NULL;
      static struct kobject *dev_kobj;
      struct kobject *sysfs_dev_char_kobj;
      struct kobject *sysfs_dev_block_kobj;
      
      static DEFINE_MUTEX(device_hotplug_lock);
      
      void lock_device_hotplug(void)
      {
              mutex_lock(&device_hotplug_lock);
      }
      
      void unlock_device_hotplug(void)
      {
              mutex_unlock(&device_hotplug_lock);
      }
      
      int lock_device_hotplug_sysfs(void)
      {
              if (mutex_trylock(&device_hotplug_lock))
                      return 0;
      
              /* Avoid busy looping (5 ms of sleep should do). */
              msleep(5);
              return restart_syscall();
      }
      
      #ifdef CONFIG_BLOCK
      static inline int device_is_not_partition(struct device *dev)
      {
              return !(dev->type == &part_type);
      }
      #else
      static inline int device_is_not_partition(struct device *dev)
      {
              return 1;
      }
      #endif
      
      static int
      device_platform_notify(struct device *dev, enum kobject_action action)
      {
              int ret;
      
              ret = acpi_platform_notify(dev, action);
              if (ret)
                      return ret;
      
              ret = software_node_notify(dev, action);
              if (ret)
                      return ret;
      
              if (platform_notify && action == KOBJ_ADD)
                      platform_notify(dev);
              else if (platform_notify_remove && action == KOBJ_REMOVE)
                      platform_notify_remove(dev);
              return 0;
      }
      
      /**
       * dev_driver_string - Return a device's driver name, if at all possible
       * @dev: struct device to get the name of
       *
       * Will return the device's driver's name if it is bound to a device.  If
       * the device is not bound to a driver, it will return the name of the bus
       * it is attached to.  If it is not attached to a bus either, an empty
       * string will be returned.
       */
    1 const char *dev_driver_string(const struct device *dev)
      {
              struct device_driver *drv;
      
              /* dev->driver can change to NULL underneath us because of unbinding,
               * so be careful about accessing it.  dev->bus and dev->class should
               * never change once they are set, so they don't need special care.
               */
    6         drv = READ_ONCE(dev->driver);
    6         return drv ? drv->name :
    1                         (dev->bus ? dev->bus->name :
    1                         (dev->class ? dev->class->name : ""));
      }
      EXPORT_SYMBOL(dev_driver_string);
      
      #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
      
      static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
                                   char *buf)
      {
              struct device_attribute *dev_attr = to_dev_attr(attr);
              struct device *dev = kobj_to_dev(kobj);
              ssize_t ret = -EIO;
      
              if (dev_attr->show)
                      ret = dev_attr->show(dev, dev_attr, buf);
              if (ret >= (ssize_t)PAGE_SIZE) {
                      printk("dev_attr_show: %pS returned bad count\n",
                                      dev_attr->show);
              }
              return ret;
      }
      
      static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
                                    const char *buf, size_t count)
      {
              struct device_attribute *dev_attr = to_dev_attr(attr);
              struct device *dev = kobj_to_dev(kobj);
              ssize_t ret = -EIO;
      
              if (dev_attr->store)
                      ret = dev_attr->store(dev, dev_attr, buf, count);
              return ret;
      }
      
      static const struct sysfs_ops dev_sysfs_ops = {
              .show        = dev_attr_show,
              .store        = dev_attr_store,
      };
      
      #define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)
      
      ssize_t device_store_ulong(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t size)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
              int ret;
              unsigned long new;
      
              ret = kstrtoul(buf, 0, &new);
              if (ret)
                      return ret;
              *(unsigned long *)(ea->var) = new;
              /* Always return full write size even if we didn't consume all */
              return size;
      }
      EXPORT_SYMBOL_GPL(device_store_ulong);
      
      ssize_t device_show_ulong(struct device *dev,
                                struct device_attribute *attr,
                                char *buf)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
              return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var));
      }
      EXPORT_SYMBOL_GPL(device_show_ulong);
      
      ssize_t device_store_int(struct device *dev,
                               struct device_attribute *attr,
                               const char *buf, size_t size)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
              int ret;
              long new;
      
              ret = kstrtol(buf, 0, &new);
              if (ret)
                      return ret;
      
              if (new > INT_MAX || new < INT_MIN)
                      return -EINVAL;
              *(int *)(ea->var) = new;
              /* Always return full write size even if we didn't consume all */
              return size;
      }
      EXPORT_SYMBOL_GPL(device_store_int);
      
      ssize_t device_show_int(struct device *dev,
                              struct device_attribute *attr,
                              char *buf)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
      
              return sysfs_emit(buf, "%d\n", *(int *)(ea->var));
      }
      EXPORT_SYMBOL_GPL(device_show_int);
      
      ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
                                const char *buf, size_t size)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
      
              if (strtobool(buf, ea->var) < 0)
                      return -EINVAL;
      
              return size;
      }
      EXPORT_SYMBOL_GPL(device_store_bool);
      
      ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
                               char *buf)
      {
              struct dev_ext_attribute *ea = to_ext_attr(attr);
      
              return sysfs_emit(buf, "%d\n", *(bool *)(ea->var));
      }
      EXPORT_SYMBOL_GPL(device_show_bool);
      
      /**
       * device_release - free device structure.
       * @kobj: device's kobject.
       *
       * This is called once the reference count for the object
       * reaches 0. We forward the call to the device's release
       * method, which should handle actually freeing the structure.
       */
      static void device_release(struct kobject *kobj)
      {
              struct device *dev = kobj_to_dev(kobj);
    5         struct device_private *p = dev->p;
      
              /*
               * Some platform devices are driven without driver attached
               * and managed resources may have been acquired.  Make sure
               * all resources are released.
               *
               * Drivers still can add resources into device after device
               * is deleted but alive, so release devres here to avoid
               * possible memory leak.
               */
              devres_release_all(dev);
      
              kfree(dev->dma_range_map);
      
              if (dev->release)
                      dev->release(dev);
    5         else if (dev->type && dev->type->release)
                      dev->type->release(dev);
              else if (dev->class && dev->class->dev_release)
    5                 dev->class->dev_release(dev);
              else
                      WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
                              dev_name(dev));
    5         kfree(p);
      }
      
      static const void *device_namespace(struct kobject *kobj)
      {
              struct device *dev = kobj_to_dev(kobj);
              const void *ns = NULL;
      
              if (dev->class && dev->class->ns_type)
                      ns = dev->class->namespace(dev);
      
              return ns;
      }
      
      static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
      {
              struct device *dev = kobj_to_dev(kobj);
      
              if (dev->class && dev->class->get_ownership)
                      dev->class->get_ownership(dev, uid, gid);
      }
      
      static struct kobj_type device_ktype = {
              .release        = device_release,
              .sysfs_ops        = &dev_sysfs_ops,
              .namespace        = device_namespace,
              .get_ownership        = device_get_ownership,
      };
      
      
    1 static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
      {
    1         struct kobj_type *ktype = get_ktype(kobj);
      
              if (ktype == &device_ktype) {
                      struct device *dev = kobj_to_dev(kobj);
    1                 if (dev->bus)
                              return 1;
    1                 if (dev->class)
                              return 1;
              }
              return 0;
      }
      
    1 static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
      {
              struct device *dev = kobj_to_dev(kobj);
      
    1         if (dev->bus)
                      return dev->bus->name;
    1         if (dev->class)
    1                 return dev->class->name;
              return NULL;
      }
      
      static int dev_uevent(struct kset *kset, struct kobject *kobj,
                            struct kobj_uevent_env *env)
      {
              struct device *dev = kobj_to_dev(kobj);
              int retval = 0;
      
              /* add device node properties if present */
    1         if (MAJOR(dev->devt)) {
                      const char *tmp;
                      const char *name;
                      umode_t mode = 0;
                      kuid_t uid = GLOBAL_ROOT_UID;
                      kgid_t gid = GLOBAL_ROOT_GID;
      
                      add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
                      add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
                      name = device_get_devnode(dev, &mode, &uid, &gid, &tmp);
                      if (name) {
                              add_uevent_var(env, "DEVNAME=%s", name);
                              if (mode)
                                      add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
                              if (!uid_eq(uid, GLOBAL_ROOT_UID))
                                      add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid));
                              if (!gid_eq(gid, GLOBAL_ROOT_GID))
                                      add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid));
                              kfree(tmp);
                      }
              }
      
    1         if (dev->type && dev->type->name)
                      add_uevent_var(env, "DEVTYPE=%s", dev->type->name);
      
    1         if (dev->driver)
    1                 add_uevent_var(env, "DRIVER=%s", dev->driver->name);
      
              /* Add common DT information about the device */
    1         of_device_uevent(dev, env);
      
              /* have the bus specific function add its stuff */
              if (dev->bus && dev->bus->uevent) {
                      retval = dev->bus->uevent(dev, env);
                      if (retval)
                              pr_debug("device: '%s': %s: bus uevent() returned %d\n",
                                       dev_name(dev), __func__, retval);
              }
      
              /* have the class specific function add its stuff */
    1         if (dev->class && dev->class->dev_uevent) {
    1                 retval = dev->class->dev_uevent(dev, env);
                      if (retval)
                              pr_debug("device: '%s': %s: class uevent() "
                                       "returned %d\n", dev_name(dev),
                                       __func__, retval);
              }
      
              /* have the device type specific function add its stuff */
    1         if (dev->type && dev->type->uevent) {
                      retval = dev->type->uevent(dev, env);
                      if (retval)
                              pr_debug("device: '%s': %s: dev_type uevent() "
                                       "returned %d\n", dev_name(dev),
                                       __func__, retval);
              }
      
    1         return retval;
      }
      
      static const struct kset_uevent_ops device_uevent_ops = {
              .filter =        dev_uevent_filter,
              .name =                dev_uevent_name,
              .uevent =        dev_uevent,
      };
      
      static ssize_t uevent_show(struct device *dev, struct device_attribute *attr,
                                 char *buf)
      {
              struct kobject *top_kobj;
              struct kset *kset;
              struct kobj_uevent_env *env = NULL;
              int i;
              int len = 0;
              int retval;
      
              /* search the kset, the device belongs to */
              top_kobj = &dev->kobj;
              while (!top_kobj->kset && top_kobj->parent)
                      top_kobj = top_kobj->parent;
              if (!top_kobj->kset)
                      goto out;
      
              kset = top_kobj->kset;
              if (!kset->uevent_ops || !kset->uevent_ops->uevent)
                      goto out;
      
              /* respect filter */
              if (kset->uevent_ops && kset->uevent_ops->filter)
                      if (!kset->uevent_ops->filter(kset, &dev->kobj))
                              goto out;
      
              env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
              if (!env)
                      return -ENOMEM;
      
              /* let the kset specific function add its keys */
              retval = kset->uevent_ops->uevent(kset, &dev->kobj, env);
              if (retval)
                      goto out;
      
              /* copy keys to file */
              for (i = 0; i < env->envp_idx; i++)
                      len += sysfs_emit_at(buf, len, "%s\n", env->envp[i]);
      out:
              kfree(env);
              return len;
      }
      
      static ssize_t uevent_store(struct device *dev, struct device_attribute *attr,
                                  const char *buf, size_t count)
      {
              int rc;
      
              rc = kobject_synth_uevent(&dev->kobj, buf, count);
      
              if (rc) {
                      dev_err(dev, "uevent: failed to send synthetic uevent\n");
                      return rc;
              }
      
              return count;
      }
      static DEVICE_ATTR_RW(uevent);
      
      static ssize_t online_show(struct device *dev, struct device_attribute *attr,
                                 char *buf)
      {
              bool val;
      
              device_lock(dev);
              val = !dev->offline;
              device_unlock(dev);
              return sysfs_emit(buf, "%u\n", val);
      }
      
      static ssize_t online_store(struct device *dev, struct device_attribute *attr,
                                  const char *buf, size_t count)
      {
              bool val;
              int ret;
      
              ret = strtobool(buf, &val);
              if (ret < 0)
                      return ret;
      
              ret = lock_device_hotplug_sysfs();
              if (ret)
                      return ret;
      
              ret = val ? device_online(dev) : device_offline(dev);
              unlock_device_hotplug();
              return ret < 0 ? ret : count;
      }
      static DEVICE_ATTR_RW(online);
      
      int device_add_groups(struct device *dev, const struct attribute_group **groups)
      {
              return sysfs_create_groups(&dev->kobj, groups);
      }
      EXPORT_SYMBOL_GPL(device_add_groups);
      
      void device_remove_groups(struct device *dev,
                                const struct attribute_group **groups)
      {
              sysfs_remove_groups(&dev->kobj, groups);
      }
      EXPORT_SYMBOL_GPL(device_remove_groups);
      
      union device_attr_group_devres {
              const struct attribute_group *group;
              const struct attribute_group **groups;
      };
      
      static int devm_attr_group_match(struct device *dev, void *res, void *data)
      {
              return ((union device_attr_group_devres *)res)->group == data;
      }
      
      static void devm_attr_group_remove(struct device *dev, void *res)
      {
              union device_attr_group_devres *devres = res;
              const struct attribute_group *group = devres->group;
      
              dev_dbg(dev, "%s: removing group %p\n", __func__, group);
              sysfs_remove_group(&dev->kobj, group);
      }
      
      static void devm_attr_groups_remove(struct device *dev, void *res)
      {
              union device_attr_group_devres *devres = res;
              const struct attribute_group **groups = devres->groups;
      
              dev_dbg(dev, "%s: removing groups %p\n", __func__, groups);
              sysfs_remove_groups(&dev->kobj, groups);
      }
      
      /**
       * devm_device_add_group - given a device, create a managed attribute group
       * @dev:        The device to create the group for
       * @grp:        The attribute group to create
       *
       * This function creates a group for the first time.  It will explicitly
       * warn and error if any of the attribute files being created already exist.
       *
       * Returns 0 on success or error code on failure.
       */
      int devm_device_add_group(struct device *dev, const struct attribute_group *grp)
      {
              union device_attr_group_devres *devres;
              int error;
      
              devres = devres_alloc(devm_attr_group_remove,
                                    sizeof(*devres), GFP_KERNEL);
              if (!devres)
                      return -ENOMEM;
      
              error = sysfs_create_group(&dev->kobj, grp);
              if (error) {
                      devres_free(devres);
                      return error;
              }
      
              devres->group = grp;
              devres_add(dev, devres);
              return 0;
      }
      EXPORT_SYMBOL_GPL(devm_device_add_group);
      
      /**
       * devm_device_remove_group: remove a managed group from a device
       * @dev:        device to remove the group from
       * @grp:        group to remove
       *
       * This function removes a group of attributes from a device. The attributes
       * previously have to have been created for this group, otherwise it will fail.
       */
      void devm_device_remove_group(struct device *dev,
                                    const struct attribute_group *grp)
      {
              WARN_ON(devres_release(dev, devm_attr_group_remove,
                                     devm_attr_group_match,
                                     /* cast away const */ (void *)grp));
      }
      EXPORT_SYMBOL_GPL(devm_device_remove_group);
      
      /**
       * devm_device_add_groups - create a bunch of managed attribute groups
       * @dev:        The device to create the group for
       * @groups:        The attribute groups to create, NULL terminated
       *
       * This function creates a bunch of managed attribute groups.  If an error
       * occurs when creating a group, all previously created groups will be
       * removed, unwinding everything back to the original state when this
       * function was called.  It will explicitly warn and error if any of the
       * attribute files being created already exist.
       *
       * Returns 0 on success or error code from sysfs_create_group on failure.
       */
      int devm_device_add_groups(struct device *dev,
                                 const struct attribute_group **groups)
      {
              union device_attr_group_devres *devres;
              int error;
      
              devres = devres_alloc(devm_attr_groups_remove,
                                    sizeof(*devres), GFP_KERNEL);
              if (!devres)
                      return -ENOMEM;
      
              error = sysfs_create_groups(&dev->kobj, groups);
              if (error) {
                      devres_free(devres);
                      return error;
              }
      
              devres->groups = groups;
              devres_add(dev, devres);
              return 0;
      }
      EXPORT_SYMBOL_GPL(devm_device_add_groups);
      
      /**
       * devm_device_remove_groups - remove a list of managed groups
       *
       * @dev:        The device for the groups to be removed from
       * @groups:        NULL terminated list of groups to be removed
       *
       * If groups is not NULL, remove the specified groups from the device.
       */
      void devm_device_remove_groups(struct device *dev,
                                     const struct attribute_group **groups)
      {
              WARN_ON(devres_release(dev, devm_attr_groups_remove,
                                     devm_attr_group_match,
                                     /* cast away const */ (void *)groups));
      }
      EXPORT_SYMBOL_GPL(devm_device_remove_groups);
      
      static int device_add_attrs(struct device *dev)
      {
              struct class *class = dev->class;
              const struct device_type *type = dev->type;
              int error;
      
              if (class) {
                      error = device_add_groups(dev, class->dev_groups);
                      if (error)
                              return error;
              }
      
              if (type) {
                      error = device_add_groups(dev, type->groups);
                      if (error)
                              goto err_remove_class_groups;
              }
      
              error = device_add_groups(dev, dev->groups);
              if (error)
                      goto err_remove_type_groups;
      
              if (device_supports_offline(dev) && !dev->offline_disabled) {
                      error = device_create_file(dev, &dev_attr_online);
                      if (error)
                              goto err_remove_dev_groups;
              }
      
              if (fw_devlink_flags && !fw_devlink_is_permissive() && dev->fwnode) {
                      error = device_create_file(dev, &dev_attr_waiting_for_supplier);
                      if (error)
                              goto err_remove_dev_online;
              }
      
              return 0;
      
       err_remove_dev_online:
              device_remove_file(dev, &dev_attr_online);
       err_remove_dev_groups:
              device_remove_groups(dev, dev->groups);
       err_remove_type_groups:
              if (type)
                      device_remove_groups(dev, type->groups);
       err_remove_class_groups:
              if (class)
                      device_remove_groups(dev, class->dev_groups);
      
              return error;
      }
      
      static void device_remove_attrs(struct device *dev)
      {
              struct class *class = dev->class;
              const struct device_type *type = dev->type;
      
              device_remove_file(dev, &dev_attr_waiting_for_supplier);
              device_remove_file(dev, &dev_attr_online);
              device_remove_groups(dev, dev->groups);
      
              if (type)
                      device_remove_groups(dev, type->groups);
      
              if (class)
                      device_remove_groups(dev, class->dev_groups);
      }
      
      static ssize_t dev_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
      {
              return print_dev_t(buf, dev->devt);
      }
      static DEVICE_ATTR_RO(dev);
      
      /* /sys/devices/ */
      struct kset *devices_kset;
      
      /**
       * devices_kset_move_before - Move device in the devices_kset's list.
       * @deva: Device to move.
       * @devb: Device @deva should come before.
       */
      static void devices_kset_move_before(struct device *deva, struct device *devb)
      {
              if (!devices_kset)
                      return;
              pr_debug("devices_kset: Moving %s before %s\n",
                       dev_name(deva), dev_name(devb));
              spin_lock(&devices_kset->list_lock);
              list_move_tail(&deva->kobj.entry, &devb->kobj.entry);
              spin_unlock(&devices_kset->list_lock);
      }
      
      /**
       * devices_kset_move_after - Move device in the devices_kset's list.
       * @deva: Device to move
       * @devb: Device @deva should come after.
       */
      static void devices_kset_move_after(struct device *deva, struct device *devb)
      {
              if (!devices_kset)
                      return;
              pr_debug("devices_kset: Moving %s after %s\n",
                       dev_name(deva), dev_name(devb));
              spin_lock(&devices_kset->list_lock);
              list_move(&deva->kobj.entry, &devb->kobj.entry);
              spin_unlock(&devices_kset->list_lock);
      }
      
      /**
       * devices_kset_move_last - move the device to the end of devices_kset's list.
       * @dev: device to move
       */
      void devices_kset_move_last(struct device *dev)
      {
              if (!devices_kset)
                      return;
              pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev));
              spin_lock(&devices_kset->list_lock);
              list_move_tail(&dev->kobj.entry, &devices_kset->list);
              spin_unlock(&devices_kset->list_lock);
      }
      
      /**
       * device_create_file - create sysfs attribute file for device.
       * @dev: device.
       * @attr: device attribute descriptor.
       */
      int device_create_file(struct device *dev,
                             const struct device_attribute *attr)
      {
              int error = 0;
      
              if (dev) {
                      WARN(((attr->attr.mode & S_IWUGO) && !attr->store),
                              "Attribute %s: write permission without 'store'\n",
                              attr->attr.name);
                      WARN(((attr->attr.mode & S_IRUGO) && !attr->show),
                              "Attribute %s: read permission without 'show'\n",
                              attr->attr.name);
                      error = sysfs_create_file(&dev->kobj, &attr->attr);
              }
      
              return error;
      }
      EXPORT_SYMBOL_GPL(device_create_file);
      
      /**
       * device_remove_file - remove sysfs attribute file.
       * @dev: device.
       * @attr: device attribute descriptor.
       */
      void device_remove_file(struct device *dev,
                              const struct device_attribute *attr)
      {
              if (dev)
                      sysfs_remove_file(&dev->kobj, &attr->attr);
      }
      EXPORT_SYMBOL_GPL(device_remove_file);
      
      /**
       * device_remove_file_self - remove sysfs attribute file from its own method.
       * @dev: device.
       * @attr: device attribute descriptor.
       *
       * See kernfs_remove_self() for details.
       */
      bool device_remove_file_self(struct device *dev,
                                   const struct device_attribute *attr)
      {
              if (dev)
                      return sysfs_remove_file_self(&dev->kobj, &attr->attr);
              else
                      return false;
      }
      EXPORT_SYMBOL_GPL(device_remove_file_self);
      
      /**
       * device_create_bin_file - create sysfs binary attribute file for device.
       * @dev: device.
       * @attr: device binary attribute descriptor.
       */
      int device_create_bin_file(struct device *dev,
                                 const struct bin_attribute *attr)
      {
              int error = -EINVAL;
              if (dev)
                      error = sysfs_create_bin_file(&dev->kobj, attr);
              return error;
      }
      EXPORT_SYMBOL_GPL(device_create_bin_file);
      
      /**
       * device_remove_bin_file - remove sysfs binary attribute file
       * @dev: device.
       * @attr: device binary attribute descriptor.
       */
      void device_remove_bin_file(struct device *dev,
                                  const struct bin_attribute *attr)
      {
              if (dev)
                      sysfs_remove_bin_file(&dev->kobj, attr);
      }
      EXPORT_SYMBOL_GPL(device_remove_bin_file);
      
      static void klist_children_get(struct klist_node *n)
      {
              struct device_private *p = to_device_private_parent(n);
              struct device *dev = p->device;
      
              get_device(dev);
      }
      
      static void klist_children_put(struct klist_node *n)
      {
              struct device_private *p = to_device_private_parent(n);
              struct device *dev = p->device;
      
              put_device(dev);
      }
      
      /**
       * device_initialize - init device structure.
       * @dev: device.
       *
       * This prepares the device for use by other layers by initializing
       * its fields.
       * It is the first half of device_register(), if called by
       * that function, though it can also be called separately, so one
       * may use @dev's fields. In particular, get_device()/put_device()
       * may be used for reference counting of @dev after calling this
       * function.
       *
       * All fields in @dev must be initialized by the caller to 0, except
       * for those explicitly set to some other value.  The simplest
       * approach is to use kzalloc() to allocate the structure containing
       * @dev.
       *
       * NOTE: Use put_device() to give up your reference instead of freeing
       * @dev directly once you have called this function.
       */
      void device_initialize(struct device *dev)
      {
    5         dev->kobj.kset = devices_kset;
              kobject_init(&dev->kobj, &device_ktype);
              INIT_LIST_HEAD(&dev->dma_pools);
              mutex_init(&dev->mutex);
      #ifdef CONFIG_PROVE_LOCKING
              mutex_init(&dev->lockdep_mutex);
      #endif
              lockdep_set_novalidate_class(&dev->mutex);
              spin_lock_init(&dev->devres_lock);
              INIT_LIST_HEAD(&dev->devres_head);
    5         device_pm_init(dev);
              set_dev_node(dev, -1);
      #ifdef CONFIG_GENERIC_MSI_IRQ
              INIT_LIST_HEAD(&dev->msi_list);
      #endif
              INIT_LIST_HEAD(&dev->links.consumers);
              INIT_LIST_HEAD(&dev->links.suppliers);
              INIT_LIST_HEAD(&dev->links.defer_sync);
              dev->links.status = DL_DEV_NO_DRIVER;
      }
      EXPORT_SYMBOL_GPL(device_initialize);
      
      struct kobject *virtual_device_parent(struct device *dev)
      {
              static struct kobject *virtual_dir = NULL;
      
              if (!virtual_dir)
                      virtual_dir = kobject_create_and_add("virtual",
                                                           &devices_kset->kobj);
      
              return virtual_dir;
      }
      
      struct class_dir {
              struct kobject kobj;
              struct class *class;
      };
      
      #define to_class_dir(obj) container_of(obj, struct class_dir, kobj)
      
      static void class_dir_release(struct kobject *kobj)
      {
              struct class_dir *dir = to_class_dir(kobj);
              kfree(dir);
      }
      
      static const
      struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj)
      {
              struct class_dir *dir = to_class_dir(kobj);
    1         return dir->class->ns_type;
      }
      
      static struct kobj_type class_dir_ktype = {
              .release        = class_dir_release,
              .sysfs_ops        = &kobj_sysfs_ops,
              .child_ns_type        = class_dir_child_ns_type
      };
      
      static struct kobject *
      class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
      {
              struct class_dir *dir;
              int retval;
      
              dir = kzalloc(sizeof(*dir), GFP_KERNEL);
              if (!dir)
                      return ERR_PTR(-ENOMEM);
      
              dir->class = class;
              kobject_init(&dir->kobj, &class_dir_ktype);
      
              dir->kobj.kset = &class->p->glue_dirs;
      
              retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
              if (retval < 0) {
                      kobject_put(&dir->kobj);
                      return ERR_PTR(retval);
              }
              return &dir->kobj;
      }
      
      static DEFINE_MUTEX(gdp_mutex);
      
      static struct kobject *get_device_parent(struct device *dev,
                                               struct device *parent)
      {
              if (dev->class) {
                      struct kobject *kobj = NULL;
                      struct kobject *parent_kobj;
                      struct kobject *k;
      
      #ifdef CONFIG_BLOCK
                      /* block disks show up in /sys/block */
                      if (sysfs_deprecated && dev->class == &block_class) {
                              if (parent && parent->class == &block_class)
                                      return &parent->kobj;
                              return &block_class.p->subsys.kobj;
                      }
      #endif
      
                      /*
                       * If we have no parent, we live in "virtual".
                       * Class-devices with a non class-device as parent, live
                       * in a "glue" directory to prevent namespace collisions.
                       */
                      if (parent == NULL)
                              parent_kobj = virtual_device_parent(dev);
                      else if (parent->class && !dev->class->ns_type)
                              return &parent->kobj;
                      else
                              parent_kobj = &parent->kobj;
      
                      mutex_lock(&gdp_mutex);
      
                      /* find our class-directory at the parent and reference it */
                      spin_lock(&dev->class->p->glue_dirs.list_lock);
                      list_for_each_entry(k, &dev->class->p->glue_dirs.list, entry)
                              if (k->parent == parent_kobj) {
                                      kobj = kobject_get(k);
                                      break;
                              }
                      spin_unlock(&dev->class->p->glue_dirs.list_lock);
                      if (kobj) {
                              mutex_unlock(&gdp_mutex);
                              return kobj;
                      }
      
                      /* or create a new class-directory at the parent device */
                      k = class_dir_create_and_add(dev->class, parent_kobj);
                      /* do not emit an uevent for this simple "glue" directory */
                      mutex_unlock(&gdp_mutex);
                      return k;
              }
      
              /* subsystems can specify a default root directory for their devices */
              if (!parent && dev->bus && dev->bus->dev_root)
                      return &dev->bus->dev_root->kobj;
      
              if (parent)
                      return &parent->kobj;
              return NULL;
      }
      
      static inline bool live_in_glue_dir(struct kobject *kobj,
                                          struct device *dev)
      {
              if (!kobj || !dev->class ||
                  kobj->kset != &dev->class->p->glue_dirs)
                      return false;
              return true;
      }
      
      static inline struct kobject *get_glue_dir(struct device *dev)
      {
              return dev->kobj.parent;
      }
      
      /*
       * make sure cleaning up dir as the last step, we need to make
       * sure .release handler of kobject is run with holding the
       * global lock
       */
      static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
      {
              unsigned int ref;
      
              /* see if we live in a "glue" directory */
              if (!live_in_glue_dir(glue_dir, dev))
                      return;
      
              mutex_lock(&gdp_mutex);
              /**
               * There is a race condition between removing glue directory
               * and adding a new device under the glue directory.
               *
               * CPU1:                                         CPU2:
               *
               * device_add()
               *   get_device_parent()
               *     class_dir_create_and_add()
               *       kobject_add_internal()
               *         create_dir()    // create glue_dir
               *
               *                                               device_add()
               *                                                 get_device_parent()
               *                                                   kobject_get() // get glue_dir
               *
               * device_del()
               *   cleanup_glue_dir()
               *     kobject_del(glue_dir)
               *
               *                                               kobject_add()
               *                                                 kobject_add_internal()
               *                                                   create_dir() // in glue_dir
               *                                                     sysfs_create_dir_ns()
               *                                                       kernfs_create_dir_ns(sd)
               *
               *       sysfs_remove_dir() // glue_dir->sd=NULL
               *       sysfs_put()        // free glue_dir->sd
               *
               *                                                         // sd is freed
               *                                                         kernfs_new_node(sd)
               *                                                           kernfs_get(glue_dir)
               *                                                           kernfs_add_one()
               *                                                           kernfs_put()
               *
               * Before CPU1 remove last child device under glue dir, if CPU2 add
               * a new device under glue dir, the glue_dir kobject reference count
               * will be increase to 2 in kobject_get(k). And CPU2 has been called
               * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir()
               * and sysfs_put(). This result in glue_dir->sd is freed.
               *
               * Then the CPU2 will see a stale "empty" but still potentially used
               * glue dir around in kernfs_new_node().
               *
               * In order to avoid this happening, we also should make sure that
               * kernfs_node for glue_dir is released in CPU1 only when refcount
               * for glue_dir kobj is 1.
               */
              ref = kref_read(&glue_dir->kref);
              if (!kobject_has_children(glue_dir) && !--ref)
                      kobject_del(glue_dir);
              kobject_put(glue_dir);
              mutex_unlock(&gdp_mutex);
      }
      
      static int device_add_class_symlinks(struct device *dev)
      {
              struct device_node *of_node = dev_of_node(dev);
              int error;
      
              if (of_node) {
                      error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node");
                      if (error)
                              dev_warn(dev, "Error %d creating of_node link\n",error);
                      /* An error here doesn't warrant bringing down the device */
              }
      
              if (!dev->class)
                      return 0;
      
              error = sysfs_create_link(&dev->kobj,
                                        &dev->class->p->subsys.kobj,
                                        "subsystem");
              if (error)
                      goto out_devnode;
      
              if (dev->parent && device_is_not_partition(dev)) {
                      error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
                                                "device");
                      if (error)
                              goto out_subsys;
              }
      
      #ifdef CONFIG_BLOCK
              /* /sys/block has directories and does not need symlinks */
              if (sysfs_deprecated && dev->class == &block_class)
                      return 0;
      #endif
      
              /* link in the class directory pointing to the device */
              error = sysfs_create_link(&dev->class->p->subsys.kobj,
                                        &dev->kobj, dev_name(dev));
              if (error)
                      goto out_device;
      
              return 0;
      
      out_device:
              sysfs_remove_link(&dev->kobj, "device");
      
      out_subsys:
              sysfs_remove_link(&dev->kobj, "subsystem");
      out_devnode:
              sysfs_remove_link(&dev->kobj, "of_node");
              return error;
      }
      
      static void device_remove_class_symlinks(struct device *dev)
      {
              if (dev_of_node(dev))
                      sysfs_remove_link(&dev->kobj, "of_node");
      
              if (!dev->class)
                      return;
      
              if (dev->parent && device_is_not_partition(dev))
                      sysfs_remove_link(&dev->kobj, "device");
              sysfs_remove_link(&dev->kobj, "subsystem");
      #ifdef CONFIG_BLOCK
              if (sysfs_deprecated && dev->class == &block_class)
                      return;
      #endif
              sysfs_delete_link(&dev->class->p->subsys.kobj, &dev->kobj, dev_name(dev));
      }
      
      /**
       * dev_set_name - set a device name
       * @dev: device
       * @fmt: format string for the device's name
       */
      int dev_set_name(struct device *dev, const char *fmt, ...)
      {
              va_list vargs;
              int err;
      
    5         va_start(vargs, fmt);
              err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
              va_end(vargs);
              return err;
      }
      EXPORT_SYMBOL_GPL(dev_set_name);
      
      /**
       * device_to_dev_kobj - select a /sys/dev/ directory for the device
       * @dev: device
       *
       * By default we select char/ for new entries.  Setting class->dev_obj
       * to NULL prevents an entry from being created.  class->dev_kobj must
       * be set (or cleared) before any devices are registered to the class
       * otherwise device_create_sys_dev_entry() and
       * device_remove_sys_dev_entry() will disagree about the presence of
       * the link.
       */
      static struct kobject *device_to_dev_kobj(struct device *dev)
      {
              struct kobject *kobj;
      
              if (dev->class)
                      kobj = dev->class->dev_kobj;
              else
                      kobj = sysfs_dev_char_kobj;
      
              return kobj;
      }
      
      static int device_create_sys_dev_entry(struct device *dev)
      {
              struct kobject *kobj = device_to_dev_kobj(dev);
              int error = 0;
              char devt_str[15];
      
              if (kobj) {
                      format_dev_t(devt_str, dev->devt);
                      error = sysfs_create_link(kobj, &dev->kobj, devt_str);
              }
      
              return error;
      }
      
      static void device_remove_sys_dev_entry(struct device *dev)
      {
              struct kobject *kobj = device_to_dev_kobj(dev);
              char devt_str[15];
      
              if (kobj) {
                      format_dev_t(devt_str, dev->devt);
                      sysfs_remove_link(kobj, devt_str);
              }
      }
      
      static int device_private_init(struct device *dev)
      {
              dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
              if (!dev->p)
                      return -ENOMEM;
              dev->p->device = dev;
              klist_init(&dev->p->klist_children, klist_children_get,
                         klist_children_put);
              INIT_LIST_HEAD(&dev->p->deferred_probe);
              return 0;
      }
      
      /**
       * device_add - add device to device hierarchy.
       * @dev: device.
       *
       * This is part 2 of device_register(), though may be called
       * separately _iff_ device_initialize() has been called separately.
       *
       * This adds @dev to the kobject hierarchy via kobject_add(), adds it
       * to the global and sibling lists for the device, then
       * adds it to the other relevant subsystems of the driver model.
       *
       * Do not call this routine or device_register() more than once for
       * any device structure.  The driver model core is not designed to work
       * with devices that get unregistered and then spring back to life.
       * (Among other things, it's very hard to guarantee that all references
       * to the previous incarnation of @dev have been dropped.)  Allocate
       * and register a fresh new struct device instead.
       *
       * NOTE: _Never_ directly free @dev after calling this function, even
       * if it returned an error! Always use put_device() to give up your
       * reference instead.
       *
       * Rule of thumb is: if device_add() succeeds, you should call
       * device_del() when you want to get rid of it. If device_add() has
       * *not* succeeded, use *only* put_device() to drop the reference
       * count.
       */
      int device_add(struct device *dev)
      {
              struct device *parent;
              struct kobject *kobj;
              struct class_interface *class_intf;
              int error = -EINVAL;
              struct kobject *glue_dir = NULL;
      
              dev = get_device(dev);
              if (!dev)
                      goto done;
      
              if (!dev->p) {
                      error = device_private_init(dev);
                      if (error)
                              goto done;
              }
      
              /*
               * for statically allocated devices, which should all be converted
               * some day, we need to initialize the name. We prevent reading back
               * the name, and force the use of dev_name()
               */
              if (dev->init_name) {
                      dev_set_name(dev, "%s", dev->init_name);
                      dev->init_name = NULL;
              }
      
              /* subsystems can specify simple device enumeration */
              if (!dev_name(dev) && dev->bus && dev->bus->dev_name)
                      dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
      
              if (!dev_name(dev)) {
                      error = -EINVAL;
                      goto name_error;
              }
      
              pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
      
              parent = get_device(dev->parent);
              kobj = get_device_parent(dev, parent);
              if (IS_ERR(kobj)) {
                      error = PTR_ERR(kobj);
                      goto parent_error;
              }
              if (kobj)
                      dev->kobj.parent = kobj;
      
              /* use parent numa_node */
              if (parent && (dev_to_node(dev) == NUMA_NO_NODE))
                      set_dev_node(dev, dev_to_node(parent));
      
              /* first, register with generic layer. */
              /* we require the name to be set before, and pass NULL */
              error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
              if (error) {
                      glue_dir = get_glue_dir(dev);
                      goto Error;
              }
      
              /* notify platform of device entry */
              error = device_platform_notify(dev, KOBJ_ADD);
              if (error)
                      goto platform_error;
      
              error = device_create_file(dev, &dev_attr_uevent);
              if (error)
                      goto attrError;
      
              error = device_add_class_symlinks(dev);
              if (error)
                      goto SymlinkError;
              error = device_add_attrs(dev);
              if (error)
                      goto AttrsError;
              error = bus_add_device(dev);
              if (error)
                      goto BusError;
              error = dpm_sysfs_add(dev);
              if (error)
                      goto DPMError;
              device_pm_add(dev);
      
              if (MAJOR(dev->devt)) {
                      error = device_create_file(dev, &dev_attr_dev);
                      if (error)
                              goto DevAttrError;
      
                      error = device_create_sys_dev_entry(dev);
                      if (error)
                              goto SysEntryError;
      
                      devtmpfs_create_node(dev);
              }
      
              /* Notify clients of device addition.  This call must come
               * after dpm_sysfs_add() and before kobject_uevent().
               */
              if (dev->bus)
                      blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
                                                   BUS_NOTIFY_ADD_DEVICE, dev);
      
              kobject_uevent(&dev->kobj, KOBJ_ADD);
      
              /*
               * Check if any of the other devices (consumers) have been waiting for
               * this device (supplier) to be added so that they can create a device
               * link to it.
               *
               * This needs to happen after device_pm_add() because device_link_add()
               * requires the supplier be registered before it's called.
               *
               * But this also needs to happen before bus_probe_device() to make sure
               * waiting consumers can link to it before the driver is bound to the
               * device and the driver sync_state callback is called for this device.
               */
              if (dev->fwnode && !dev->fwnode->dev) {
                      dev->fwnode->dev = dev;
                      fw_devlink_link_device(dev);
              }
      
              bus_probe_device(dev);
              if (parent)
                      klist_add_tail(&dev->p->knode_parent,
                                     &parent->p->klist_children);
      
              if (dev->class) {
                      mutex_lock(&dev->class->p->mutex);
                      /* tie the class to the device */
                      klist_add_tail(&dev->p->knode_class,
                                     &dev->class->p->klist_devices);
      
                      /* notify any interfaces that the device is here */
                      list_for_each_entry(class_intf,
                                          &dev->class->p->interfaces, node)
                              if (class_intf->add_dev)
                                      class_intf->add_dev(dev, class_intf);
                      mutex_unlock(&dev->class->p->mutex);
              }
      done:
              put_device(dev);
              return error;
       SysEntryError:
              if (MAJOR(dev->devt))
                      device_remove_file(dev, &dev_attr_dev);
       DevAttrError:
              device_pm_remove(dev);
              dpm_sysfs_remove(dev);
       DPMError:
              bus_remove_device(dev);
       BusError:
              device_remove_attrs(dev);
       AttrsError:
              device_remove_class_symlinks(dev);
       SymlinkError:
              device_remove_file(dev, &dev_attr_uevent);
       attrError:
              device_platform_notify(dev, KOBJ_REMOVE);
      platform_error:
              kobject_uevent(&dev->kobj, KOBJ_REMOVE);
              glue_dir = get_glue_dir(dev);
              kobject_del(&dev->kobj);
       Error:
              cleanup_glue_dir(dev, glue_dir);
      parent_error:
              put_device(parent);
      name_error:
              kfree(dev->p);
              dev->p = NULL;
              goto done;
      }
      EXPORT_SYMBOL_GPL(device_add);
      
      /**
       * device_register - register a device with the system.
       * @dev: pointer to the device structure
       *
       * This happens in two clean steps - initialize the device
       * and add it to the system. The two steps can be called
       * separately, but this is the easiest and most common.
       * I.e. you should only call the two helpers separately if
       * have a clearly defined need to use and refcount the device
       * before it is added to the hierarchy.
       *
       * For more information, see the kerneldoc for device_initialize()
       * and device_add().
       *
       * NOTE: _Never_ directly free @dev after calling this function, even
       * if it returned an error! Always use put_device() to give up the
       * reference initialized in this function instead.
       */
      int device_register(struct device *dev)
      {
              device_initialize(dev);
              return device_add(dev);
      }
      EXPORT_SYMBOL_GPL(device_register);
      
      /**
       * get_device - increment reference count for device.
       * @dev: device.
       *
       * This simply forwards the call to kobject_get(), though
       * we do take care to provide for the case that we get a NULL
       * pointer passed in.
       */
      struct device *get_device(struct device *dev)
      {
              return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL;
      }
      EXPORT_SYMBOL_GPL(get_device);
      
      /**
       * put_device - decrement reference count.
       * @dev: device in question.
       */
      void put_device(struct device *dev)
      {
              /* might_sleep(); */
    5         if (dev)
    5                 kobject_put(&dev->kobj);
      }
      EXPORT_SYMBOL_GPL(put_device);
      
      bool kill_device(struct device *dev)
      {
              /*
               * Require the device lock and set the "dead" flag to guarantee that
               * the update behavior is consistent with the other bitfields near
               * it and that we cannot have an asynchronous probe routine trying
               * to run while we are tearing out the bus/class/sysfs from
               * underneath the device.
               */
              lockdep_assert_held(&dev->mutex);
      
              if (dev->p->dead)
                      return false;
              dev->p->dead = true;
              return true;
      }
      EXPORT_SYMBOL_GPL(kill_device);
      
      /**
       * device_del - delete device from system.
       * @dev: device.
       *
       * This is the first part of the device unregistration
       * sequence. This removes the device from the lists we control
       * from here, has it removed from the other driver model
       * subsystems it was added to in device_add(), and removes it
       * from the kobject hierarchy.
       *
       * NOTE: this should be called manually _iff_ device_add() was
       * also called manually.
       */
      void device_del(struct device *dev)
      {
              struct device *parent = dev->parent;
              struct kobject *glue_dir = NULL;
              struct class_interface *class_intf;
              unsigned int noio_flag;
      
              device_lock(dev);
              kill_device(dev);
              device_unlock(dev);
      
              if (dev->fwnode && dev->fwnode->dev == dev)
                      dev->fwnode->dev = NULL;
      
              /* Notify clients of device removal.  This call must come
               * before dpm_sysfs_remove().
               */
              noio_flag = memalloc_noio_save();
              if (dev->bus)
                      blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
                                                   BUS_NOTIFY_DEL_DEVICE, dev);
      
              dpm_sysfs_remove(dev);
              if (parent)
                      klist_del(&dev->p->knode_parent);
              if (MAJOR(dev->devt)) {
                      devtmpfs_delete_node(dev);
                      device_remove_sys_dev_entry(dev);
                      device_remove_file(dev, &dev_attr_dev);
              }
              if (dev->class) {
                      device_remove_class_symlinks(dev);
      
                      mutex_lock(&dev->class->p->mutex);
                      /* notify any interfaces that the device is now gone */
                      list_for_each_entry(class_intf,
                                          &dev->class->p->interfaces, node)
                              if (class_intf->remove_dev)
                                      class_intf->remove_dev(dev, class_intf);
                      /* remove the device from the class list */
                      klist_del(&dev->p->knode_class);
                      mutex_unlock(&dev->class->p->mutex);
              }
              device_remove_file(dev, &dev_attr_uevent);
              device_remove_attrs(dev);
              bus_remove_device(dev);
              device_pm_remove(dev);
              driver_deferred_probe_del(dev);
              device_platform_notify(dev, KOBJ_REMOVE);
              device_remove_properties(dev);
              device_links_purge(dev);
      
              if (dev->bus)
                      blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
                                                   BUS_NOTIFY_REMOVED_DEVICE, dev);
              kobject_uevent(&dev->kobj, KOBJ_REMOVE);
              glue_dir = get_glue_dir(dev);
              kobject_del(&dev->kobj);
              cleanup_glue_dir(dev, glue_dir);
              memalloc_noio_restore(noio_flag);
              put_device(parent);
      }
      EXPORT_SYMBOL_GPL(device_del);
      
      /**
       * device_unregister - unregister device from system.
       * @dev: device going away.
       *
       * We do this in two parts, like we do device_register(). First,
       * we remove it from all the subsystems with device_del(), then
       * we decrement the reference count via put_device(). If that
       * is the final reference count, the device will be cleaned up
       * via device_release() above. Otherwise, the structure will
       * stick around until the final reference to the device is dropped.
       */
      void device_unregister(struct device *dev)
      {
              pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
              device_del(dev);
              put_device(dev);
      }
      EXPORT_SYMBOL_GPL(device_unregister);
      
      static struct device *prev_device(struct klist_iter *i)
      {
              struct klist_node *n = klist_prev(i);
              struct device *dev = NULL;
              struct device_private *p;
      
              if (n) {
                      p = to_device_private_parent(n);
                      dev = p->device;
              }
              return dev;
      }
      
      static struct device *next_device(struct klist_iter *i)
      {
              struct klist_node *n = klist_next(i);
              struct device *dev = NULL;
              struct device_private *p;
      
              if (n) {
                      p = to_device_private_parent(n);
                      dev = p->device;
              }
              return dev;
      }
      
      /**
       * device_get_devnode - path of device node file
       * @dev: device
       * @mode: returned file access mode
       * @uid: returned file owner
       * @gid: returned file group
       * @tmp: possibly allocated string
       *
       * Return the relative path of a possible device node.
       * Non-default names may need to allocate a memory to compose
       * a name. This memory is returned in tmp and needs to be
       * freed by the caller.
       */
      const char *device_get_devnode(struct device *dev,
                                     umode_t *mode, kuid_t *uid, kgid_t *gid,
                                     const char **tmp)
      {
              char *s;
      
              *tmp = NULL;
      
              /* the device type may provide a specific name */
              if (dev->type && dev->type->devnode)
                      *tmp = dev->type->devnode(dev, mode, uid, gid);
              if (*tmp)
                      return *tmp;
      
              /* the class may provide a specific name */
              if (dev->class && dev->class->devnode)
                      *tmp = dev->class->devnode(dev, mode);
              if (*tmp)
                      return *tmp;
      
              /* return name without allocation, tmp == NULL */
              if (strchr(dev_name(dev), '!') == NULL)
                      return dev_name(dev);
      
              /* replace '!' in the name with '/' */
              s = kstrdup(dev_name(dev), GFP_KERNEL);
              if (!s)
                      return NULL;
              strreplace(s, '!', '/');
              return *tmp = s;
      }
      
      /**
       * device_for_each_child - device child iterator.
       * @parent: parent struct device.
       * @fn: function to be called for each device.
       * @data: data for the callback.
       *
       * Iterate over @parent's child devices, and call @fn for each,
       * passing it @data.
       *
       * We check the return of @fn each time. If it returns anything
       * other than 0, we break out and return that value.
       */
      int device_for_each_child(struct device *parent, void *data,
                                int (*fn)(struct device *dev, void *data))
      {
              struct klist_iter i;
              struct device *child;
              int error = 0;
      
              if (!parent->p)
                      return 0;
      
              klist_iter_init(&parent->p->klist_children, &i);
              while (!error && (child = next_device(&i)))
                      error = fn(child, data);
              klist_iter_exit(&i);
              return error;
      }
      EXPORT_SYMBOL_GPL(device_for_each_child);
      
      /**
       * device_for_each_child_reverse - device child iterator in reversed order.
       * @parent: parent struct device.
       * @fn: function to be called for each device.
       * @data: data for the callback.
       *
       * Iterate over @parent's child devices, and call @fn for each,
       * passing it @data.
       *
       * We check the return of @fn each time. If it returns anything
       * other than 0, we break out and return that value.
       */
      int device_for_each_child_reverse(struct device *parent, void *data,
                                        int (*fn)(struct device *dev, void *data))
      {
              struct klist_iter i;
              struct device *child;
              int error = 0;
      
              if (!parent->p)
                      return 0;
      
              klist_iter_init(&parent->p->klist_children, &i);
              while ((child = prev_device(&i)) && !error)
                      error = fn(child, data);
              klist_iter_exit(&i);
              return error;
      }
      EXPORT_SYMBOL_GPL(device_for_each_child_reverse);
      
      /**
       * device_find_child - device iterator for locating a particular device.
       * @parent: parent struct device
       * @match: Callback function to check device
       * @data: Data to pass to match function
       *
       * This is similar to the device_for_each_child() function above, but it
       * returns a reference to a device that is 'found' for later use, as
       * determined by the @match callback.
       *
       * The callback should return 0 if the device doesn't match and non-zero
       * if it does.  If the callback returns non-zero and a reference to the
       * current device can be obtained, this function will return to the caller
       * and not iterate over any more devices.
       *
       * NOTE: you will need to drop the reference with put_device() after use.
       */
      struct device *device_find_child(struct device *parent, void *data,
                                       int (*match)(struct device *dev, void *data))
      {
              struct klist_iter i;
              struct device *child;
      
              if (!parent)
                      return NULL;
      
              klist_iter_init(&parent->p->klist_children, &i);
              while ((child = next_device(&i)))
                      if (match(child, data) && get_device(child))
                              break;
              klist_iter_exit(&i);
              return child;
      }
      EXPORT_SYMBOL_GPL(device_find_child);
      
      /**
       * device_find_child_by_name - device iterator for locating a child device.
       * @parent: parent struct device
       * @name: name of the child device
       *
       * This is similar to the device_find_child() function above, but it
       * returns a reference to a device that has the name @name.
       *
       * NOTE: you will need to drop the reference with put_device() after use.
       */
      struct device *device_find_child_by_name(struct device *parent,
                                               const char *name)
      {
              struct klist_iter i;
              struct device *child;
      
              if (!parent)
                      return NULL;
      
              klist_iter_init(&parent->p->klist_children, &i);
              while ((child = next_device(&i)))
                      if (sysfs_streq(dev_name(child), name) && get_device(child))
                              break;
              klist_iter_exit(&i);
              return child;
      }
      EXPORT_SYMBOL_GPL(device_find_child_by_name);
      
      int __init devices_init(void)
      {
              devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
              if (!devices_kset)
                      return -ENOMEM;
              dev_kobj = kobject_create_and_add("dev", NULL);
              if (!dev_kobj)
                      goto dev_kobj_err;
              sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj);
              if (!sysfs_dev_block_kobj)
                      goto block_kobj_err;
              sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
              if (!sysfs_dev_char_kobj)
                      goto char_kobj_err;
      
              return 0;
      
       char_kobj_err:
              kobject_put(sysfs_dev_block_kobj);
       block_kobj_err:
              kobject_put(dev_kobj);
       dev_kobj_err:
              kset_unregister(devices_kset);
              return -ENOMEM;
      }
      
      static int device_check_offline(struct device *dev, void *not_used)
      {
              int ret;
      
              ret = device_for_each_child(dev, NULL, device_check_offline);
              if (ret)
                      return ret;
      
              return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0;
      }
      
      /**
       * device_offline - Prepare the device for hot-removal.
       * @dev: Device to be put offline.
       *
       * Execute the device bus type's .offline() callback, if present, to prepare
       * the device for a subsequent hot-removal.  If that succeeds, the device must
       * not be used until either it is removed or its bus type's .online() callback
       * is executed.
       *
       * Call under device_hotplug_lock.
       */
      int device_offline(struct device *dev)
      {
              int ret;
      
              if (dev->offline_disabled)
                      return -EPERM;
      
              ret = device_for_each_child(dev, NULL, device_check_offline);
              if (ret)
                      return ret;
      
              device_lock(dev);
              if (device_supports_offline(dev)) {
                      if (dev->offline) {
                              ret = 1;
                      } else {
                              ret = dev->bus->offline(dev);
                              if (!ret) {
                                      kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
                                      dev->offline = true;
                              }
                      }
              }
              device_unlock(dev);
      
              return ret;
      }
      
      /**
       * device_online - Put the device back online after successful device_offline().
       * @dev: Device to be put back online.
       *
       * If device_offline() has been successfully executed for @dev, but the device
       * has not been removed subsequently, execute its bus type's .online() callback
       * to indicate that the device can be used again.
       *
       * Call under device_hotplug_lock.
       */
      int device_online(struct device *dev)
      {
              int ret = 0;
      
              device_lock(dev);
              if (device_supports_offline(dev)) {
                      if (dev->offline) {
                              ret = dev->bus->online(dev);
                              if (!ret) {
                                      kobject_uevent(&dev->kobj, KOBJ_ONLINE);
                                      dev->offline = false;
                              }
                      } else {
                              ret = 1;
                      }
              }
              device_unlock(dev);
      
              return ret;
      }
      
      struct root_device {
              struct device dev;
              struct module *owner;
      };
      
      static inline struct root_device *to_root_device(struct device *d)
      {
              return container_of(d, struct root_device, dev);
      }
      
      static void root_device_release(struct device *dev)
      {
              kfree(to_root_device(dev));
      }
      
      /**
       * __root_device_register - allocate and register a root device
       * @name: root device name
       * @owner: owner module of the root device, usually THIS_MODULE
       *
       * This function allocates a root device and registers it
       * using device_register(). In order to free the returned
       * device, use root_device_unregister().
       *
       * Root devices are dummy devices which allow other devices
       * to be grouped under /sys/devices. Use this function to
       * allocate a root device and then use it as the parent of
       * any device which should appear under /sys/devices/{name}
       *
       * The /sys/devices/{name} directory will also contain a
       * 'module' symlink which points to the @owner directory
       * in sysfs.
       *
       * Returns &struct device pointer on success, or ERR_PTR() on error.
       *
       * Note: You probably want to use root_device_register().
       */
      struct device *__root_device_register(const char *name, struct module *owner)
      {
              struct root_device *root;
              int err = -ENOMEM;
      
              root = kzalloc(sizeof(struct root_device), GFP_KERNEL);
              if (!root)
                      return ERR_PTR(err);
      
              err = dev_set_name(&root->dev, "%s", name);
              if (err) {
                      kfree(root);
                      return ERR_PTR(err);
              }
      
              root->dev.release = root_device_release;
      
              err = device_register(&root->dev);
              if (err) {
                      put_device(&root->dev);
                      return ERR_PTR(err);
              }
      
      #ifdef CONFIG_MODULES        /* gotta find a "cleaner" way to do this */
              if (owner) {
                      struct module_kobject *mk = &owner->mkobj;
      
                      err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module");
                      if (err) {
                              device_unregister(&root->dev);
                              return ERR_PTR(err);
                      }
                      root->owner = owner;
              }
      #endif
      
              return &root->dev;
      }
      EXPORT_SYMBOL_GPL(__root_device_register);
      
      /**
       * root_device_unregister - unregister and free a root device
       * @dev: device going away
       *
       * This function unregisters and cleans up a device that was created by
       * root_device_register().
       */
      void root_device_unregister(struct device *dev)
      {
              struct root_device *root = to_root_device(dev);
      
              if (root->owner)
                      sysfs_remove_link(&root->dev.kobj, "module");
      
              device_unregister(dev);
      }
      EXPORT_SYMBOL_GPL(root_device_unregister);
      
      
      static void device_create_release(struct device *dev)
      {
              pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
              kfree(dev);
      }
      
      static __printf(6, 0) struct device *
      device_create_groups_vargs(struct class *class, struct device *parent,
                                 dev_t devt, void *drvdata,
                                 const struct attribute_group **groups,
                                 const char *fmt, va_list args)
      {
              struct device *dev = NULL;
              int retval = -ENODEV;
      
              if (class == NULL || IS_ERR(class))
                      goto error;
      
              dev = kzalloc(sizeof(*dev), GFP_KERNEL);
              if (!dev) {
                      retval = -ENOMEM;
                      goto error;
              }
      
              device_initialize(dev);
              dev->devt = devt;
              dev->class = class;
              dev->parent = parent;
              dev->groups = groups;
              dev->release = device_create_release;
              dev_set_drvdata(dev, drvdata);
      
              retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
              if (retval)
                      goto error;
      
              retval = device_add(dev);
              if (retval)
                      goto error;
      
              return dev;
      
      error:
              put_device(dev);
              return ERR_PTR(retval);
      }
      
      /**
       * device_create - creates a device and registers it with sysfs
       * @class: pointer to the struct class that this device should be registered to
       * @parent: pointer to the parent struct device of this new device, if any
       * @devt: the dev_t for the char device to be added
       * @drvdata: the data to be added to the device for callbacks
       * @fmt: string for the device's name
       *
       * This function can be used by char device classes.  A struct device
       * will be created in sysfs, registered to the specified class.
       *
       * A "dev" file will be created, showing the dev_t for the device, if
       * the dev_t is not 0,0.
       * If a pointer to a parent struct device is passed in, the newly created
       * struct device will be a child of that device in sysfs.
       * The pointer to the struct device will be returned from the call.
       * Any further sysfs files that might be required can be created using this
       * pointer.
       *
       * Returns &struct device pointer on success, or ERR_PTR() on error.
       *
       * Note: the struct class passed to this function must have previously
       * been created with a call to class_create().
       */
      struct device *device_create(struct class *class, struct device *parent,
                                   dev_t devt, void *drvdata, const char *fmt, ...)
      {
              va_list vargs;
              struct device *dev;
      
              va_start(vargs, fmt);
              dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
                                                fmt, vargs);
              va_end(vargs);
              return dev;
      }
      EXPORT_SYMBOL_GPL(device_create);
      
      /**
       * device_create_with_groups - creates a device and registers it with sysfs
       * @class: pointer to the struct class that this device should be registered to
       * @parent: pointer to the parent struct device of this new device, if any
       * @devt: the dev_t for the char device to be added
       * @drvdata: the data to be added to the device for callbacks
       * @groups: NULL-terminated list of attribute groups to be created
       * @fmt: string for the device's name
       *
       * This function can be used by char device classes.  A struct device
       * will be created in sysfs, registered to the specified class.
       * Additional attributes specified in the groups parameter will also
       * be created automatically.
       *
       * A "dev" file will be created, showing the dev_t for the device, if
       * the dev_t is not 0,0.
       * If a pointer to a parent struct device is passed in, the newly created
       * struct device will be a child of that device in sysfs.
       * The pointer to the struct device will be returned from the call.
       * Any further sysfs files that might be required can be created using this
       * pointer.
       *
       * Returns &struct device pointer on success, or ERR_PTR() on error.
       *
       * Note: the struct class passed to this function must have previously
       * been created with a call to class_create().
       */
      struct device *device_create_with_groups(struct class *class,
                                               struct device *parent, dev_t devt,
                                               void *drvdata,
                                               const struct attribute_group **groups,
                                               const char *fmt, ...)
      {
              va_list vargs;
              struct device *dev;
      
              va_start(vargs, fmt);
              dev = device_create_groups_vargs(class, parent, devt, drvdata, groups,
                                               fmt, vargs);
              va_end(vargs);
              return dev;
      }
      EXPORT_SYMBOL_GPL(device_create_with_groups);
      
      /**
       * device_destroy - removes a device that was created with device_create()
       * @class: pointer to the struct class that this device was registered with
       * @devt: the dev_t of the device that was previously registered
       *
       * This call unregisters and cleans up a device that was created with a
       * call to device_create().
       */
      void device_destroy(struct class *class, dev_t devt)
      {
              struct device *dev;
      
              dev = class_find_device_by_devt(class, devt);
              if (dev) {
                      put_device(dev);
                      device_unregister(dev);
              }
      }
      EXPORT_SYMBOL_GPL(device_destroy);
      
      /**
       * device_rename - renames a device
       * @dev: the pointer to the struct device to be renamed
       * @new_name: the new name of the device
       *
       * It is the responsibility of the caller to provide mutual
       * exclusion between two different calls of device_rename
       * on the same device to ensure that new_name is valid and
       * won't conflict with other devices.
       *
       * Note: Don't call this function.  Currently, the networking layer calls this
       * function, but that will change.  The following text from Kay Sievers offers
       * some insight:
       *
       * Renaming devices is racy at many levels, symlinks and other stuff are not
       * replaced atomically, and you get a "move" uevent, but it's not easy to
       * connect the event to the old and new device. Device nodes are not renamed at
       * all, there isn't even support for that in the kernel now.
       *
       * In the meantime, during renaming, your target name might be taken by another
       * driver, creating conflicts. Or the old name is taken directly after you
       * renamed it -- then you get events for the same DEVPATH, before you even see
       * the "move" event. It's just a mess, and nothing new should ever rely on
       * kernel device renaming. Besides that, it's not even implemented now for
       * other things than (driver-core wise very simple) network devices.
       *
       * We are currently about to change network renaming in udev to completely
       * disallow renaming of devices in the same namespace as the kernel uses,
       * because we can't solve the problems properly, that arise with swapping names
       * of multiple interfaces without races. Means, renaming of eth[0-9]* will only
       * be allowed to some other name than eth[0-9]*, for the aforementioned
       * reasons.
       *
       * Make up a "real" name in the driver before you register anything, or add
       * some other attributes for userspace to find the device, or use udev to add
       * symlinks -- but never rename kernel devices later, it's a complete mess. We
       * don't even want to get into that and try to implement the missing pieces in
       * the core. We really have other pieces to fix in the driver core mess. :)
       */
      int device_rename(struct device *dev, const char *new_name)
      {
              struct kobject *kobj = &dev->kobj;
              char *old_device_name = NULL;
              int error;
      
              dev = get_device(dev);
              if (!dev)
                      return -EINVAL;
      
              dev_dbg(dev, "renaming to %s\n", new_name);
      
              old_device_name = kstrdup(dev_name(dev), GFP_KERNEL);
              if (!old_device_name) {
                      error = -ENOMEM;
                      goto out;
              }
      
              if (dev->class) {
                      error = sysfs_rename_link_ns(&dev->class->p->subsys.kobj,
                                                   kobj, old_device_name,
                                                   new_name, kobject_namespace(kobj));
                      if (error)
                              goto out;
              }
      
              error = kobject_rename(kobj, new_name);
              if (error)
                      goto out;
      
      out:
              put_device(dev);
      
              kfree(old_device_name);
      
              return error;
      }
      EXPORT_SYMBOL_GPL(device_rename);
      
      static int device_move_class_links(struct device *dev,
                                         struct device *old_parent,
                                         struct device *new_parent)
      {
              int error = 0;
      
              if (old_parent)
                      sysfs_remove_link(&dev->kobj, "device");
              if (new_parent)
                      error = sysfs_create_link(&dev->kobj, &new_parent->kobj,
                                                "device");
              return error;
      }
      
      /**
       * device_move - moves a device to a new parent
       * @dev: the pointer to the struct device to be moved
       * @new_parent: the new parent of the device (can be NULL)
       * @dpm_order: how to reorder the dpm_list
       */
      int device_move(struct device *dev, struct device *new_parent,
                      enum dpm_order dpm_order)
      {
              int error;
              struct device *old_parent;
              struct kobject *new_parent_kobj;
      
              dev = get_device(dev);
              if (!dev)
                      return -EINVAL;
      
              device_pm_lock();
              new_parent = get_device(new_parent);
              new_parent_kobj = get_device_parent(dev, new_parent);
              if (IS_ERR(new_parent_kobj)) {
                      error = PTR_ERR(new_parent_kobj);
                      put_device(new_parent);
                      goto out;
              }
      
              pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev),
                       __func__, new_parent ? dev_name(new_parent) : "<NULL>");
              error = kobject_move(&dev->kobj, new_parent_kobj);
              if (error) {
                      cleanup_glue_dir(dev, new_parent_kobj);
                      put_device(new_parent);
                      goto out;
              }
              old_parent = dev->parent;
              dev->parent = new_parent;
              if (old_parent)
                      klist_remove(&dev->p->knode_parent);
              if (new_parent) {
                      klist_add_tail(&dev->p->knode_parent,
                                     &new_parent->p->klist_children);
                      set_dev_node(dev, dev_to_node(new_parent));
              }
      
              if (dev->class) {
                      error = device_move_class_links(dev, old_parent, new_parent);
                      if (error) {
                              /* We ignore errors on cleanup since we're hosed anyway... */
                              device_move_class_links(dev, new_parent, old_parent);
                              if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
                                      if (new_parent)
                                              klist_remove(&dev->p->knode_parent);
                                      dev->parent = old_parent;
                                      if (old_parent) {
                                              klist_add_tail(&dev->p->knode_parent,
                                                             &old_parent->p->klist_children);
                                              set_dev_node(dev, dev_to_node(old_parent));
                                      }
                              }
                              cleanup_glue_dir(dev, new_parent_kobj);
                              put_device(new_parent);
                              goto out;
                      }
              }
              switch (dpm_order) {
              case DPM_ORDER_NONE:
                      break;
              case DPM_ORDER_DEV_AFTER_PARENT:
                      device_pm_move_after(dev, new_parent);
                      devices_kset_move_after(dev, new_parent);
                      break;
              case DPM_ORDER_PARENT_BEFORE_DEV:
                      device_pm_move_before(new_parent, dev);
                      devices_kset_move_before(new_parent, dev);
                      break;
              case DPM_ORDER_DEV_LAST:
                      device_pm_move_last(dev);
                      devices_kset_move_last(dev);
                      break;
              }
      
              put_device(old_parent);
      out:
              device_pm_unlock();
              put_device(dev);
              return error;
      }
      EXPORT_SYMBOL_GPL(device_move);
      
      static int device_attrs_change_owner(struct device *dev, kuid_t kuid,
                                           kgid_t kgid)
      {
              struct kobject *kobj = &dev->kobj;
              struct class *class = dev->class;
              const struct device_type *type = dev->type;
              int error;
      
              if (class) {
                      /*
                       * Change the device groups of the device class for @dev to
                       * @kuid/@kgid.
                       */
                      error = sysfs_groups_change_owner(kobj, class->dev_groups, kuid,
                                                        kgid);
                      if (error)
                              return error;
              }
      
              if (type) {
                      /*
                       * Change the device groups of the device type for @dev to
                       * @kuid/@kgid.
                       */
                      error = sysfs_groups_change_owner(kobj, type->groups, kuid,
                                                        kgid);
                      if (error)
                              return error;
              }
      
              /* Change the device groups of @dev to @kuid/@kgid. */
              error = sysfs_groups_change_owner(kobj, dev->groups, kuid, kgid);
              if (error)
                      return error;
      
              if (device_supports_offline(dev) && !dev->offline_disabled) {
                      /* Change online device attributes of @dev to @kuid/@kgid. */
                      error = sysfs_file_change_owner(kobj, dev_attr_online.attr.name,
                                                      kuid, kgid);
                      if (error)
                              return error;
              }
      
              return 0;
      }
      
      /**
       * device_change_owner - change the owner of an existing device.
       * @dev: device.
       * @kuid: new owner's kuid
       * @kgid: new owner's kgid
       *
       * This changes the owner of @dev and its corresponding sysfs entries to
       * @kuid/@kgid. This function closely mirrors how @dev was added via driver
       * core.
       *
       * Returns 0 on success or error code on failure.
       */
      int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
      {
              int error;
              struct kobject *kobj = &dev->kobj;
      
              dev = get_device(dev);
              if (!dev)
                      return -EINVAL;
      
              /*
               * Change the kobject and the default attributes and groups of the
               * ktype associated with it to @kuid/@kgid.
               */
              error = sysfs_change_owner(kobj, kuid, kgid);
              if (error)
                      goto out;
      
              /*
               * Change the uevent file for @dev to the new owner. The uevent file
               * was created in a separate step when @dev got added and we mirror
               * that step here.
               */
              error = sysfs_file_change_owner(kobj, dev_attr_uevent.attr.name, kuid,
                                              kgid);
              if (error)
                      goto out;
      
              /*
               * Change the device groups, the device groups associated with the
               * device class, and the groups associated with the device type of @dev
               * to @kuid/@kgid.
               */
              error = device_attrs_change_owner(dev, kuid, kgid);
              if (error)
                      goto out;
      
              error = dpm_sysfs_change_owner(dev, kuid, kgid);
              if (error)
                      goto out;
      
      #ifdef CONFIG_BLOCK
              if (sysfs_deprecated && dev->class == &block_class)
                      goto out;
      #endif
      
              /*
               * Change the owner of the symlink located in the class directory of
               * the device class associated with @dev which points to the actual
               * directory entry for @dev to @kuid/@kgid. This ensures that the
               * symlink shows the same permissions as its target.
               */
              error = sysfs_link_change_owner(&dev->class->p->subsys.kobj, &dev->kobj,
                                              dev_name(dev), kuid, kgid);
              if (error)
                      goto out;
      
      out:
              put_device(dev);
              return error;
      }
      EXPORT_SYMBOL_GPL(device_change_owner);
      
      /**
       * device_shutdown - call ->shutdown() on each device to shutdown.
       */
      void device_shutdown(void)
      {
              struct device *dev, *parent;
      
              wait_for_device_probe();
              device_block_probing();
      
              cpufreq_suspend();
      
              spin_lock(&devices_kset->list_lock);
              /*
               * Walk the devices list backward, shutting down each in turn.
               * Beware that device unplug events may also start pulling
               * devices offline, even as the system is shutting down.
               */
              while (!list_empty(&devices_kset->list)) {
                      dev = list_entry(devices_kset->list.prev, struct device,
                                      kobj.entry);
      
                      /*
                       * hold reference count of device's parent to
                       * prevent it from being freed because parent's
                       * lock is to be held
                       */
                      parent = get_device(dev->parent);
                      get_device(dev);
                      /*
                       * Make sure the device is off the kset list, in the
                       * event that dev->*->shutdown() doesn't remove it.
                       */
                      list_del_init(&dev->kobj.entry);
                      spin_unlock(&devices_kset->list_lock);
      
                      /* hold lock to avoid race with probe/release */
                      if (parent)
                              device_lock(parent);
                      device_lock(dev);
      
                      /* Don't allow any more runtime suspends */
                      pm_runtime_get_noresume(dev);
                      pm_runtime_barrier(dev);
      
                      if (dev->class && dev->class->shutdown_pre) {
                              if (initcall_debug)
                                      dev_info(dev, "shutdown_pre\n");
                              dev->class->shutdown_pre(dev);
                      }
                      if (dev->bus && dev->bus->shutdown) {
                              if (initcall_debug)
                                      dev_info(dev, "shutdown\n");
                              dev->bus->shutdown(dev);
                      } else if (dev->driver && dev->driver->shutdown) {
                              if (initcall_debug)
                                      dev_info(dev, "shutdown\n");
                              dev->driver->shutdown(dev);
                      }
      
                      device_unlock(dev);
                      if (parent)
                              device_unlock(parent);
      
                      put_device(dev);
                      put_device(parent);
      
                      spin_lock(&devices_kset->list_lock);
              }
              spin_unlock(&devices_kset->list_lock);
      }
      
      /*
       * Device logging functions
       */
      
      #ifdef CONFIG_PRINTK
      static void
      set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
      {
              const char *subsys;
      
    6         memset(dev_info, 0, sizeof(*dev_info));
      
              if (dev->class)
    1                 subsys = dev->class->name;
    5         else if (dev->bus)
                      subsys = dev->bus->name;
              else
                      return;
      
    1         strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
      
              /*
               * Add device identifier DEVICE=:
               *   b12:8         block dev_t
               *   c127:3        char dev_t
               *   n8            netdev ifindex
               *   +sound:card0  subsystem:devname
               */
              if (MAJOR(dev->devt)) {
                      char c;
      
    1                 if (strcmp(subsys, "block") == 0)
                              c = 'b';
                      else
                              c = 'c';
      
    1                 snprintf(dev_info->device, sizeof(dev_info->device),
                               "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
              } else if (strcmp(subsys, "net") == 0) {
                      struct net_device *net = to_net_dev(dev);
      
                      snprintf(dev_info->device, sizeof(dev_info->device),
                               "n%u", net->ifindex);
              } else {
                      snprintf(dev_info->device, sizeof(dev_info->device),
                               "+%s:%s", subsys, dev_name(dev));
              }
      }
      
      int dev_vprintk_emit(int level, const struct device *dev,
                           const char *fmt, va_list args)
      {
              struct dev_printk_info dev_info;
      
    6         set_dev_info(dev, &dev_info);
      
    5         return vprintk_emit(0, level, &dev_info, fmt, args);
      }
      EXPORT_SYMBOL(dev_vprintk_emit);
      
      int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...)
      {
              va_list args;
              int r;
      
    6         va_start(args, fmt);
      
              r = dev_vprintk_emit(level, dev, fmt, args);
      
              va_end(args);
      
              return r;
      }
      EXPORT_SYMBOL(dev_printk_emit);
      
      static void __dev_printk(const char *level, const struct device *dev,
                              struct va_format *vaf)
      {
    6         if (dev)
    6                 dev_printk_emit(level[1] - '0', dev, "%s %s: %pV",
                                      dev_driver_string(dev), dev_name(dev), vaf);
              else
                      printk("%s(NULL device *): %pV", level, vaf);
      }
      
      void dev_printk(const char *level, const struct device *dev,
                      const char *fmt, ...)
      {
              struct va_format vaf;
              va_list args;
      
              va_start(args, fmt);
      
              vaf.fmt = fmt;
              vaf.va = &args;
      
              __dev_printk(level, dev, &vaf);
      
              va_end(args);
      }
      EXPORT_SYMBOL(dev_printk);
      
      #define define_dev_printk_level(func, kern_level)                \
      void func(const struct device *dev, const char *fmt, ...)        \
      {                                                                \
              struct va_format vaf;                                        \
              va_list args;                                                \
                                                                      \
              va_start(args, fmt);                                        \
                                                                      \
              vaf.fmt = fmt;                                                \
              vaf.va = &args;                                                \
                                                                      \
              __dev_printk(kern_level, dev, &vaf);                        \
                                                                      \
              va_end(args);                                                \
      }                                                                \
      EXPORT_SYMBOL(func);
      
      define_dev_printk_level(_dev_emerg, KERN_EMERG);
      define_dev_printk_level(_dev_alert, KERN_ALERT);
      define_dev_printk_level(_dev_crit, KERN_CRIT);
    6 define_dev_printk_level(_dev_err, KERN_ERR);
      define_dev_printk_level(_dev_warn, KERN_WARNING);
      define_dev_printk_level(_dev_notice, KERN_NOTICE);
    5 define_dev_printk_level(_dev_info, KERN_INFO);
      
      #endif
      
      /**
       * dev_err_probe - probe error check and log helper
       * @dev: the pointer to the struct device
       * @err: error value to test
       * @fmt: printf-style format string
       * @...: arguments as specified in the format string
       *
       * This helper implements common pattern present in probe functions for error
       * checking: print debug or error message depending if the error value is
       * -EPROBE_DEFER and propagate error upwards.
       * In case of -EPROBE_DEFER it sets also defer probe reason, which can be
       * checked later by reading devices_deferred debugfs attribute.
       * It replaces code sequence::
       *
       *         if (err != -EPROBE_DEFER)
       *                 dev_err(dev, ...);
       *         else
       *                 dev_dbg(dev, ...);
       *         return err;
       *
       * with::
       *
       *         return dev_err_probe(dev, err, ...);
       *
       * Returns @err.
       *
       */
      int dev_err_probe(const struct device *dev, int err, const char *fmt, ...)
      {
              struct va_format vaf;
              va_list args;
      
              va_start(args, fmt);
              vaf.fmt = fmt;
              vaf.va = &args;
      
              if (err != -EPROBE_DEFER) {
                      dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
              } else {
                      device_set_deferred_probe_reason(dev, &vaf);
                      dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf);
              }
      
              va_end(args);
      
              return err;
      }
      EXPORT_SYMBOL_GPL(dev_err_probe);
      
      static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
      {
              return fwnode && !IS_ERR(fwnode->secondary);
      }
      
      /**
       * set_primary_fwnode - Change the primary firmware node of a given device.
       * @dev: Device to handle.
       * @fwnode: New primary firmware node of the device.
       *
       * Set the device's firmware node pointer to @fwnode, but if a secondary
       * firmware node of the device is present, preserve it.
       *
       * Valid fwnode cases are:
       *  - primary --> secondary --> -ENODEV
       *  - primary --> NULL
       *  - secondary --> -ENODEV
       *  - NULL
       */
      void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
      {
              struct device *parent = dev->parent;
              struct fwnode_handle *fn = dev->fwnode;
      
              if (fwnode) {
                      if (fwnode_is_primary(fn))
                              fn = fn->secondary;
      
                      if (fn) {
                              WARN_ON(fwnode->secondary);
                              fwnode->secondary = fn;
                      }
                      dev->fwnode = fwnode;
              } else {
                      if (fwnode_is_primary(fn)) {
                              dev->fwnode = fn->secondary;
                              /* Set fn->secondary = NULL, so fn remains the primary fwnode */
                              if (!(parent && fn == parent->fwnode))
                                      fn->secondary = NULL;
                      } else {
                              dev->fwnode = NULL;
                      }
              }
      }
      EXPORT_SYMBOL_GPL(set_primary_fwnode);
      
      /**
       * set_secondary_fwnode - Change the secondary firmware node of a given device.
       * @dev: Device to handle.
       * @fwnode: New secondary firmware node of the device.
       *
       * If a primary firmware node of the device is present, set its secondary
       * pointer to @fwnode.  Otherwise, set the device's firmware node pointer to
       * @fwnode.
       */
      void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode)
      {
              if (fwnode)
                      fwnode->secondary = ERR_PTR(-ENODEV);
      
              if (fwnode_is_primary(dev->fwnode))
                      dev->fwnode->secondary = fwnode;
              else
                      dev->fwnode = fwnode;
      }
      EXPORT_SYMBOL_GPL(set_secondary_fwnode);
      
      /**
       * device_set_of_node_from_dev - reuse device-tree node of another device
       * @dev: device whose device-tree node is being set
       * @dev2: device whose device-tree node is being reused
       *
       * Takes another reference to the new device-tree node after first dropping
       * any reference held to the old node.
       */
      void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
      {
              of_node_put(dev->of_node);
              dev->of_node = of_node_get(dev2->of_node);
              dev->of_node_reused = true;
      }
      EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
      
      int device_match_name(struct device *dev, const void *name)
      {
              return sysfs_streq(dev_name(dev), name);
      }
      EXPORT_SYMBOL_GPL(device_match_name);
      
      int device_match_of_node(struct device *dev, const void *np)
      {
              return dev->of_node == np;
      }
      EXPORT_SYMBOL_GPL(device_match_of_node);
      
      int device_match_fwnode(struct device *dev, const void *fwnode)
      {
              return dev_fwnode(dev) == fwnode;
      }
      EXPORT_SYMBOL_GPL(device_match_fwnode);
      
      int device_match_devt(struct device *dev, const void *pdevt)
      {
              return dev->devt == *(dev_t *)pdevt;
      }
      EXPORT_SYMBOL_GPL(device_match_devt);
      
      int device_match_acpi_dev(struct device *dev, const void *adev)
      {
              return ACPI_COMPANION(dev) == adev;
      }
      EXPORT_SYMBOL(device_match_acpi_dev);
      
      int device_match_any(struct device *dev, const void *unused)
      {
              return 1;
      }
      EXPORT_SYMBOL_GPL(device_match_any);
      // SPDX-License-Identifier: GPL-2.0+
      /*
       * XArray implementation
       * Copyright (c) 2017-2018 Microsoft Corporation
       * Copyright (c) 2018-2020 Oracle
       * Author: Matthew Wilcox <willy@infradead.org>
       */
      
      #include <linux/bitmap.h>
      #include <linux/export.h>
      #include <linux/list.h>
      #include <linux/slab.h>
      #include <linux/xarray.h>
      
      /*
       * Coding conventions in this file:
       *
       * @xa is used to refer to the entire xarray.
       * @xas is the 'xarray operation state'.  It may be either a pointer to
       * an xa_state, or an xa_state stored on the stack.  This is an unfortunate
       * ambiguity.
       * @index is the index of the entry being operated on
       * @mark is an xa_mark_t; a small number indicating one of the mark bits.
       * @node refers to an xa_node; usually the primary one being operated on by
       * this function.
       * @offset is the index into the slots array inside an xa_node.
       * @parent refers to the @xa_node closer to the head than @node.
       * @entry refers to something stored in a slot in the xarray
       */
      
      static inline unsigned int xa_lock_type(const struct xarray *xa)
      {
              return (__force unsigned int)xa->xa_flags & 3;
      }
      
      static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
      {
              if (lock_type == XA_LOCK_IRQ)
                      xas_lock_irq(xas);
              else if (lock_type == XA_LOCK_BH)
                      xas_lock_bh(xas);
              else
                      xas_lock(xas);
      }
      
      static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
      {
              if (lock_type == XA_LOCK_IRQ)
                      xas_unlock_irq(xas);
              else if (lock_type == XA_LOCK_BH)
                      xas_unlock_bh(xas);
              else
                      xas_unlock(xas);
      }
      
      static inline bool xa_track_free(const struct xarray *xa)
      {
  133         return xa->xa_flags & XA_FLAGS_TRACK_FREE;
      }
      
      static inline bool xa_zero_busy(const struct xarray *xa)
      {
   96         return xa->xa_flags & XA_FLAGS_ZERO_BUSY;
      }
      
      static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
      {
              if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
                      xa->xa_flags |= XA_FLAGS_MARK(mark);
      }
      
      static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
      {
              if (xa->xa_flags & XA_FLAGS_MARK(mark))
                      xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
      }
      
      static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
      {
   92         return node->marks[(__force unsigned)mark];
      }
      
      static inline bool node_get_mark(struct xa_node *node,
                      unsigned int offset, xa_mark_t mark)
      {
              return test_bit(offset, node_marks(node, mark));
      }
      
      /* returns true if the bit was set */
      static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
                                      xa_mark_t mark)
      {
              return __test_and_set_bit(offset, node_marks(node, mark));
      }
      
      /* returns true if the bit was set */
      static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
                                      xa_mark_t mark)
      {
   92         return __test_and_clear_bit(offset, node_marks(node, mark));
      }
      
      static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
      {
              return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
      }
      
      static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
      {
              bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
      }
      
      #define mark_inc(mark) do { \
              mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
      } while (0)
      
      /*
       * xas_squash_marks() - Merge all marks to the first entry
       * @xas: Array operation state.
       *
       * Set a mark on the first entry if any entry has it set.  Clear marks on
       * all sibling entries.
       */
      static void xas_squash_marks(const struct xa_state *xas)
      {
              unsigned int mark = 0;
              unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
      
              if (!xas->xa_sibs)
                      return;
      
              do {
                      unsigned long *marks = xas->xa_node->marks[mark];
                      if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
                              continue;
                      __set_bit(xas->xa_offset, marks);
                      bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
              } while (mark++ != (__force unsigned)XA_MARK_MAX);
      }
      
      /* extracts the offset within this node from the index */
      static unsigned int get_offset(unsigned long index, struct xa_node *node)
      {
  352         return (index >> node->shift) & XA_CHUNK_MASK;
      }
      
      static void xas_set_offset(struct xa_state *xas)
      {
  134         xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
      }
      
      /* move the index either forwards (find) or backwards (sibling slot) */
      static void xas_move_index(struct xa_state *xas, unsigned long offset)
      {
   92         unsigned int shift = xas->xa_node->shift;
   92         xas->xa_index &= ~XA_CHUNK_MASK << shift;
   92         xas->xa_index += offset << shift;
      }
      
      static void xas_advance(struct xa_state *xas)
      {
   92         xas->xa_offset++;
              xas_move_index(xas, xas->xa_offset);
      }
      
      static void *set_bounds(struct xa_state *xas)
      {
  145         xas->xa_node = XAS_BOUNDS;
              return NULL;
      }
      
      /*
       * Starts a walk.  If the @xas is already valid, we assume that it's on
       * the right path and just return where we've got to.  If we're in an
       * error state, return NULL.  If the index is outside the current scope
       * of the xarray, return NULL without changing @xas->xa_node.  Otherwise
       * set @xas->xa_node to NULL and return the current head of the array.
       */
      static void *xas_start(struct xa_state *xas)
      {
              void *entry;
      
  368         if (xas_valid(xas))
                      return xas_reload(xas);
  367         if (xas_error(xas))
                      return NULL;
      
  369         entry = xa_head(xas->xa);
  369         if (!xa_is_node(entry)) {
  146                 if (xas->xa_index)
  144                         return set_bounds(xas);
              } else {
  352                 if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
                              return set_bounds(xas);
              }
      
  367         xas->xa_node = NULL;
  368         return entry;
      }
      
      static void *xas_descend(struct xa_state *xas, struct xa_node *node)
      {
  352         unsigned int offset = get_offset(xas->xa_index, node);
  352         void *entry = xa_entry(xas->xa, node, offset);
      
  352         xas->xa_node = node;
              if (xa_is_sibling(entry)) {
                      offset = xa_to_sibling(entry);
                      entry = xa_entry(xas->xa, node, offset);
              }
      
              xas->xa_offset = offset;
              return entry;
      }
      
      /**
       * xas_load() - Load an entry from the XArray (advanced).
       * @xas: XArray operation state.
       *
       * Usually walks the @xas to the appropriate state to load the entry
       * stored at xa_index.  However, it will do nothing and return %NULL if
       * @xas is in an error state.  xas_load() will never expand the tree.
       *
       * If the xa_state is set up to operate on a multi-index entry, xas_load()
       * may return %NULL or an internal entry, even if there are entries
       * present within the range specified by @xas.
       *
       * Context: Any context.  The caller should hold the xa_lock or the RCU lock.
       * Return: Usually an entry in the XArray, but see description for exceptions.
       */
      void *xas_load(struct xa_state *xas)
      {
  369         void *entry = xas_start(xas);
      
  369         while (xa_is_node(entry)) {
  350                 struct xa_node *node = xa_to_node(entry);
      
                      if (xas->xa_shift > node->shift)
                              break;
  352                 entry = xas_descend(xas, node);
                      if (node->shift == 0)
                              break;
              }
  369         return entry;
      }
      EXPORT_SYMBOL_GPL(xas_load);
      
      /* Move the radix tree node cache here */
      extern struct kmem_cache *radix_tree_node_cachep;
      extern void radix_tree_node_rcu_free(struct rcu_head *head);
      
      #define XA_RCU_FREE        ((struct xarray *)1)
      
      static void xa_node_free(struct xa_node *node)
      {
              XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
              node->array = XA_RCU_FREE;
              call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
      }
      
      /*
       * xas_destroy() - Free any resources allocated during the XArray operation.
       * @xas: XArray operation state.
       *
       * This function is now internal-only.
       */
      static void xas_destroy(struct xa_state *xas)
      {
  137         struct xa_node *next, *node = xas->xa_alloc;
      
              while (node) {
                      XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
                      next = rcu_dereference_raw(node->parent);
                      radix_tree_node_rcu_free(&node->rcu_head);
                      xas->xa_alloc = node = next;
              }
      }
      
      /**
       * xas_nomem() - Allocate memory if needed.
       * @xas: XArray operation state.
       * @gfp: Memory allocation flags.
       *
       * If we need to add new nodes to the XArray, we try to allocate memory
       * with GFP_NOWAIT while holding the lock, which will usually succeed.
       * If it fails, @xas is flagged as needing memory to continue.  The caller
       * should drop the lock and call xas_nomem().  If xas_nomem() succeeds,
       * the caller should retry the operation.
       *
       * Forward progress is guaranteed as one node is allocated here and
       * stored in the xa_state where it will be found by xas_alloc().  More
       * nodes will likely be found in the slab allocator, but we do not tie
       * them up here.
       *
       * Return: true if memory was needed, and was successfully allocated.
       */
  137 bool xas_nomem(struct xa_state *xas, gfp_t gfp)
      {
  137         if (xas->xa_node != XA_ERROR(-ENOMEM)) {
  137                 xas_destroy(xas);
                      return false;
              }
              if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                      gfp |= __GFP_ACCOUNT;
              xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
              if (!xas->xa_alloc)
                      return false;
              xas->xa_alloc->parent = NULL;
              XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
              xas->xa_node = XAS_RESTART;
  137         return true;
      }
      EXPORT_SYMBOL_GPL(xas_nomem);
      
      /*
       * __xas_nomem() - Drop locks and allocate memory if needed.
       * @xas: XArray operation state.
       * @gfp: Memory allocation flags.
       *
       * Internal variant of xas_nomem().
       *
       * Return: true if memory was needed, and was successfully allocated.
       */
      static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
              __must_hold(xas->xa->xa_lock)
      {
              unsigned int lock_type = xa_lock_type(xas->xa);
      
              if (xas->xa_node != XA_ERROR(-ENOMEM)) {
                      xas_destroy(xas);
                      return false;
              }
              if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                      gfp |= __GFP_ACCOUNT;
              if (gfpflags_allow_blocking(gfp)) {
                      xas_unlock_type(xas, lock_type);
                      xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
                      xas_lock_type(xas, lock_type);
              } else {
                      xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
              }
              if (!xas->xa_alloc)
                      return false;
              xas->xa_alloc->parent = NULL;
              XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
              xas->xa_node = XAS_RESTART;
              return true;
      }
      
      static void xas_update(struct xa_state *xas, struct xa_node *node)
      {
              if (xas->xa_update)
                      xas->xa_update(node);
              else
                      XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
      }
      
      static void *xas_alloc(struct xa_state *xas, unsigned int shift)
      {
  133         struct xa_node *parent = xas->xa_node;
              struct xa_node *node = xas->xa_alloc;
      
              if (xas_invalid(xas))
                      return NULL;
      
  133         if (node) {
                      xas->xa_alloc = NULL;
              } else {
                      gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
      
  133                 if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
                              gfp |= __GFP_ACCOUNT;
      
  133                 node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
                      if (!node) {
                              xas_set_err(xas, -ENOMEM);
                              return NULL;
                      }
              }
      
  133         if (parent) {
  133                 node->offset = xas->xa_offset;
                      parent->count++;
                      XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
                      xas_update(xas, parent);
              }
              XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
              XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
  133         node->shift = shift;
              node->count = 0;
              node->nr_values = 0;
              RCU_INIT_POINTER(node->parent, xas->xa_node);
              node->array = xas->xa;
      
  133         return node;
      }
      
      #ifdef CONFIG_XARRAY_MULTI
      /* Returns the number of indices covered by a given xa_state */
      static unsigned long xas_size(const struct xa_state *xas)
      {
              return (xas->xa_sibs + 1UL) << xas->xa_shift;
      }
      #endif
      
      /*
       * Use this to calculate the maximum index that will need to be created
       * in order to add the entry described by @xas.  Because we cannot store a
       * multi-index entry at index 0, the calculation is a little more complex
       * than you might expect.
       */
      static unsigned long xas_max(struct xa_state *xas)
      {
              unsigned long max = xas->xa_index;
      
      #ifdef CONFIG_XARRAY_MULTI
              if (xas->xa_shift || xas->xa_sibs) {
                      unsigned long mask = xas_size(xas) - 1;
                      max |= mask;
                      if (mask == max)
                              max++;
              }
      #endif
      
              return max;
      }
      
      /* The maximum index that can be contained in the array without expanding it */
      static unsigned long max_index(void *entry)
      {
  115         if (!xa_is_node(entry))
                      return 0;
  115         return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
      }
      
      static void xas_shrink(struct xa_state *xas)
      {
   85         struct xarray *xa = xas->xa;
              struct xa_node *node = xas->xa_node;
      
              for (;;) {
                      void *entry;
      
                      XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
                      if (node->count != 1)
                              break;
   18                 entry = xa_entry_locked(xa, node, 0);
                      if (!entry)
                              break;
                      if (!xa_is_node(entry) && node->shift)
                              break;
                      if (xa_is_zero(entry) && xa_zero_busy(xa))
                              entry = NULL;
                      xas->xa_node = XAS_BOUNDS;
      
                      RCU_INIT_POINTER(xa->xa_head, entry);
                      if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
                              xa_mark_clear(xa, XA_FREE_MARK);
      
                      node->count = 0;
                      node->nr_values = 0;
                      if (!xa_is_node(entry))
                              RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
                      xas_update(xas, node);
                      xa_node_free(node);
                      if (!xa_is_node(entry))
                              break;
                      node = xa_to_node(entry);
                      node->parent = NULL;
              }
      }
      
      /*
       * xas_delete_node() - Attempt to delete an xa_node
       * @xas: Array operation state.
       *
       * Attempts to delete the @xas->xa_node.  This will fail if xa->node has
       * a non-zero reference count.
       */
      static void xas_delete_node(struct xa_state *xas)
      {
   92         struct xa_node *node = xas->xa_node;
      
              for (;;) {
                      struct xa_node *parent;
      
                      XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
   90                 if (node->count)
                              break;
      
   90                 parent = xa_parent_locked(xas->xa, node);
                      xas->xa_node = parent;
                      xas->xa_offset = node->offset;
                      xa_node_free(node);
      
                      if (!parent) {
   15                         xas->xa->xa_head = NULL;
                              xas->xa_node = XAS_BOUNDS;
                              return;
                      }
      
   90                 parent->slots[xas->xa_offset] = NULL;
                      parent->count--;
                      XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
                      node = parent;
                      xas_update(xas, node);
              }
      
   92         if (!node->parent)
  218                 xas_shrink(xas);
      }
      
      /**
       * xas_free_nodes() - Free this node and all nodes that it references
       * @xas: Array operation state.
       * @top: Node to free
       *
       * This node has been removed from the tree.  We must now free it and all
       * of its subnodes.  There may be RCU walkers with references into the tree,
       * so we must replace all entries with retry markers.
       */
      static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
      {
              unsigned int offset = 0;
              struct xa_node *node = top;
      
              for (;;) {
                      void *entry = xa_entry_locked(xas->xa, node, offset);
      
                      if (node->shift && xa_is_node(entry)) {
                              node = xa_to_node(entry);
                              offset = 0;
                              continue;
                      }
                      if (entry)
                              RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
                      offset++;
                      while (offset == XA_CHUNK_SIZE) {
                              struct xa_node *parent;
      
                              parent = xa_parent_locked(xas->xa, node);
                              offset = node->offset + 1;
                              node->count = 0;
                              node->nr_values = 0;
                              xas_update(xas, node);
                              xa_node_free(node);
                              if (node == top)
                                      return;
                              node = parent;
                      }
              }
      }
      
      /*
       * xas_expand adds nodes to the head of the tree until it has reached
       * sufficient height to be able to contain @xas->xa_index
       */
      static int xas_expand(struct xa_state *xas, void *head)
      {
  115         struct xarray *xa = xas->xa;
              struct xa_node *node = NULL;
              unsigned int shift = 0;
              unsigned long max = xas_max(xas);
      
              if (!head) {
   96                 if (max == 0)
                              return 0;
    3                 while ((max >> shift) >= XA_CHUNK_SIZE)
                              shift += XA_CHUNK_SHIFT;
    3                 return shift + XA_CHUNK_SHIFT;
  115         } else if (xa_is_node(head)) {
  115                 node = xa_to_node(head);
                      shift = node->shift + XA_CHUNK_SHIFT;
              }
              xas->xa_node = NULL;
      
  115         while (max > max_index(head)) {
                      xa_mark_t mark = 0;
      
                      XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
  115                 node = xas_alloc(xas, shift);
                      if (!node)
                              return -ENOMEM;
      
  115                 node->count = 1;
                      if (xa_is_value(head))
                              node->nr_values = 1;
  115                 RCU_INIT_POINTER(node->slots[0], head);
      
                      /* Propagate the aggregated mark info to the new child */
                      for (;;) {
  115                         if (xa_track_free(xa) && mark == XA_FREE_MARK) {
                                      node_mark_all(node, XA_FREE_MARK);
                                      if (!xa_marked(xa, XA_FREE_MARK)) {
                                              node_clear_mark(node, 0, XA_FREE_MARK);
                                              xa_mark_set(xa, XA_FREE_MARK);
                                      }
  115                         } else if (xa_marked(xa, mark)) {
                                      node_set_mark(node, 0, mark);
                              }
  115                         if (mark == XA_MARK_MAX)
                                      break;
  115                         mark_inc(mark);
                      }
      
                      /*
                       * Now that the new node is fully initialised, we can add
                       * it to the tree
                       */
  115                 if (xa_is_node(head)) {
  115                         xa_to_node(head)->offset = 0;
                              rcu_assign_pointer(xa_to_node(head)->parent, node);
                      }
  115                 head = xa_mk_node(node);
                      rcu_assign_pointer(xa->xa_head, head);
                      xas_update(xas, node);
      
  115                 shift += XA_CHUNK_SHIFT;
              }
      
  115         xas->xa_node = node;
              return shift;
      }
      
      /*
       * xas_create() - Create a slot to store an entry in.
       * @xas: XArray operation state.
       * @allow_root: %true if we can store the entry in the root directly
       *
       * Most users will not need to call this function directly, as it is called
       * by xas_store().  It is useful for doing conditional store operations
       * (see the xa_cmpxchg() implementation for an example).
       *
       * Return: If the slot already existed, returns the contents of this slot.
       * If the slot was newly created, returns %NULL.  If it failed to create the
       * slot, returns %NULL and indicates the error in @xas.
       */
      static void *xas_create(struct xa_state *xas, bool allow_root)
      {
  137         struct xarray *xa = xas->xa;
              void *entry;
              void __rcu **slot;
              struct xa_node *node = xas->xa_node;
              int shift;
  137         unsigned int order = xas->xa_shift;
      
              if (xas_top(node)) {
  120                 entry = xa_head_locked(xa);
                      xas->xa_node = NULL;
   96                 if (!entry && xa_zero_busy(xa))
                              entry = XA_ZERO_ENTRY;
  120                 shift = xas_expand(xas, entry);
  117                 if (shift < 0)
                              return NULL;
  120                 if (!shift && !allow_root)
                              shift = XA_CHUNK_SHIFT;
  120                 entry = xa_head_locked(xa);
                      slot = &xa->xa_head;
  134         } else if (xas_error(xas)) {
                      return NULL;
  134         } else if (node) {
  134                 unsigned int offset = xas->xa_offset;
      
                      shift = node->shift;
  134                 entry = xa_entry_locked(xa, node, offset);
                      slot = &node->slots[offset];
              } else {
                      shift = 0;
                      entry = xa_head_locked(xa);
                      slot = &xa->xa_head;
              }
      
  117         while (shift > order) {
  133                 shift -= XA_CHUNK_SHIFT;
                      if (!entry) {
  133                         node = xas_alloc(xas, shift);
                              if (!node)
                                      break;
  133                         if (xa_track_free(xa))
                                      node_mark_all(node, XA_FREE_MARK);
  133                         rcu_assign_pointer(*slot, xa_mk_node(node));
  137                 } else if (xa_is_node(entry)) {
  115                         node = xa_to_node(entry);
                      } else {
                              break;
                      }
  133                 entry = xas_descend(xas, node);
                      slot = &node->slots[xas->xa_offset];
              }
      
              return entry;
      }
      
      /**
       * xas_create_range() - Ensure that stores to this range will succeed
       * @xas: XArray operation state.
       *
       * Creates all of the slots in the range covered by @xas.  Sets @xas to
       * create single-index entries and positions it at the beginning of the
       * range.  This is for the benefit of users which have not yet been
       * converted to use multi-index entries.
       */
      void xas_create_range(struct xa_state *xas)
      {
  137         unsigned long index = xas->xa_index;
              unsigned char shift = xas->xa_shift;
              unsigned char sibs = xas->xa_sibs;
      
  137         xas->xa_index |= ((sibs + 1UL) << shift) - 1;
  137         if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
  134                 xas->xa_offset |= sibs;
  137         xas->xa_shift = 0;
              xas->xa_sibs = 0;
      
              for (;;) {
  137                 xas_create(xas, true);
                      if (xas_error(xas))
                              goto restore;
  137                 if (xas->xa_index <= (index | XA_CHUNK_MASK))
                              goto success;
                      xas->xa_index -= XA_CHUNK_SIZE;
      
                      for (;;) {
                              struct xa_node *node = xas->xa_node;
                              xas->xa_node = xa_parent_locked(xas->xa, node);
                              xas->xa_offset = node->offset - 1;
                              if (node->offset != 0)
                                      break;
                      }
              }
      
      restore:
              xas->xa_shift = shift;
              xas->xa_sibs = sibs;
              xas->xa_index = index;
              return;
      success:
  137         xas->xa_index = index;
              if (xas->xa_node)
  134                 xas_set_offset(xas);
      }
      EXPORT_SYMBOL_GPL(xas_create_range);
      
      static void update_node(struct xa_state *xas, struct xa_node *node,
                      int count, int values)
      {
  208         if (!node || (!count && !values))
                      return;
      
  208         node->count += count;
              node->nr_values += values;
              XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
              XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
              xas_update(xas, node);
  208         if (count < 0)
  218                 xas_delete_node(xas);
      }
      
      /**
       * xas_store() - Store this entry in the XArray.
       * @xas: XArray operation state.
       * @entry: New entry.
       *
       * If @xas is operating on a multi-index entry, the entry returned by this
       * function is essentially meaningless (it may be an internal entry or it
       * may be %NULL, even if there are non-NULL entries at some of the indices
       * covered by the range).  This is not a problem for any current users,
       * and can be changed if needed.
       *
       * Return: The old entry at this index.
       */
      void *xas_store(struct xa_state *xas, void *entry)
      {
              struct xa_node *node;
  218         void __rcu **slot = &xas->xa->xa_head;
              unsigned int offset, max;
              int count = 0;
              int values = 0;
              void *first, *next;
  218         bool value = xa_is_value(entry);
      
              if (entry) {
  137                 bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry);
  137                 first = xas_create(xas, allow_root);
              } else {
   99                 first = xas_load(xas);
              }
      
  218         if (xas_invalid(xas))
                      return first;
              node = xas->xa_node;
  218         if (node && (xas->xa_shift < node->shift))
                      xas->xa_sibs = 0;
              if ((first == entry) && !xas->xa_sibs)
                      return first;
      
              next = first;
  218         offset = xas->xa_offset;
  218         max = xas->xa_offset + xas->xa_sibs;
              if (node) {
  208                 slot = &node->slots[offset];
                      if (xas->xa_sibs)
                              xas_squash_marks(xas);
              }
  218         if (!entry)
   99                 xas_init_marks(xas);
      
              for (;;) {
                      /*
                       * Must clear the marks before setting the entry to NULL,
                       * otherwise xas_for_each_marked may find a NULL entry and
                       * stop early.  rcu_assign_pointer contains a release barrier
                       * so the mark clearing will appear to happen before the
                       * entry is set to NULL.
                       */
  217                 rcu_assign_pointer(*slot, entry);
                      if (xa_is_node(next) && (!node || node->shift))
                              xas_free_nodes(xas, xa_to_node(next));
  218                 if (!node)
                              break;
  208                 count += !next - !entry;
                      values += !xa_is_value(first) - !value;
                      if (entry) {
  134                         if (offset == max)
                                      break;
                              if (!xa_is_sibling(entry))
                                      entry = xa_mk_sibling(xas->xa_offset);
                      } else {
   92                         if (offset == XA_CHUNK_MASK)
                                      break;
                      }
   92                 next = xa_entry_locked(xas->xa, node, ++offset);
                      if (!xa_is_sibling(next)) {
   92                         if (!entry && (offset > max))
                                      break;
                              first = next;
                      }
                      slot++;
              }
      
  218         update_node(xas, node, count, values);
              return first;
      }
      EXPORT_SYMBOL_GPL(xas_store);
      
      /**
       * xas_get_mark() - Returns the state of this mark.
       * @xas: XArray operation state.
       * @mark: Mark number.
       *
       * Return: true if the mark is set, false if the mark is clear or @xas
       * is in an error state.
       */
      bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
      {
              if (xas_invalid(xas))
                      return false;
              if (!xas->xa_node)
                      return xa_marked(xas->xa, mark);
              return node_get_mark(xas->xa_node, xas->xa_offset, mark);
      }
      EXPORT_SYMBOL_GPL(xas_get_mark);
      
      /**
       * xas_set_mark() - Sets the mark on this entry and its parents.
       * @xas: XArray operation state.
       * @mark: Mark number.
       *
       * Sets the specified mark on this entry, and walks up the tree setting it
       * on all the ancestor entries.  Does nothing if @xas has not been walked to
       * an entry, or is in an error state.
       */
      void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
      {
              struct xa_node *node = xas->xa_node;
              unsigned int offset = xas->xa_offset;
      
              if (xas_invalid(xas))
                      return;
      
              while (node) {
                      if (node_set_mark(node, offset, mark))
                              return;
                      offset = node->offset;
                      node = xa_parent_locked(xas->xa, node);
              }
      
              if (!xa_marked(xas->xa, mark))
                      xa_mark_set(xas->xa, mark);
      }
      EXPORT_SYMBOL_GPL(xas_set_mark);
      
      /**
       * xas_clear_mark() - Clears the mark on this entry and its parents.
       * @xas: XArray operation state.
       * @mark: Mark number.
       *
       * Clears the specified mark on this entry, and walks back to the head
       * attempting to clear it on all the ancestor entries.  Does nothing if
       * @xas has not been walked to an entry, or is in an error state.
       */
      void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
      {
   99         struct xa_node *node = xas->xa_node;
   92         unsigned int offset = xas->xa_offset;
      
              if (xas_invalid(xas))
                      return;
      
   99         while (node) {
   92                 if (!node_clear_mark(node, offset, mark))
                              return;
                      if (node_any_mark(node, mark))
                              return;
      
                      offset = node->offset;
                      node = xa_parent_locked(xas->xa, node);
              }
      
   15         if (xa_marked(xas->xa, mark))
                      xa_mark_clear(xas->xa, mark);
      }
      EXPORT_SYMBOL_GPL(xas_clear_mark);
      
      /**
       * xas_init_marks() - Initialise all marks for the entry
       * @xas: Array operations state.
       *
       * Initialise all marks for the entry specified by @xas.  If we're tracking
       * free entries with a mark, we need to set it on all entries.  All other
       * marks are cleared.
       *
       * This implementation is not as efficient as it could be; we may walk
       * up the tree multiple times.
       */
      void xas_init_marks(const struct xa_state *xas)
      {
              xa_mark_t mark = 0;
      
              for (;;) {
   99                 if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
                              xas_set_mark(xas, mark);
                      else
   99                         xas_clear_mark(xas, mark);
                      if (mark == XA_MARK_MAX)
                              break;
   99                 mark_inc(mark);
              }
      }
      EXPORT_SYMBOL_GPL(xas_init_marks);
      
      #ifdef CONFIG_XARRAY_MULTI
      static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
      {
              unsigned int marks = 0;
              xa_mark_t mark = XA_MARK_0;
      
              for (;;) {
                      if (node_get_mark(node, offset, mark))
                              marks |= 1 << (__force unsigned int)mark;
                      if (mark == XA_MARK_MAX)
                              break;
                      mark_inc(mark);
              }
      
              return marks;
      }
      
      static void node_set_marks(struct xa_node *node, unsigned int offset,
                              struct xa_node *child, unsigned int marks)
      {
              xa_mark_t mark = XA_MARK_0;
      
              for (;;) {
                      if (marks & (1 << (__force unsigned int)mark)) {
                              node_set_mark(node, offset, mark);
                              if (child)
                                      node_mark_all(child, mark);
                      }
                      if (mark == XA_MARK_MAX)
                              break;
                      mark_inc(mark);
              }
      }
      
      /**
       * xas_split_alloc() - Allocate memory for splitting an entry.
       * @xas: XArray operation state.
       * @entry: New entry which will be stored in the array.
       * @order: New entry order.
       * @gfp: Memory allocation flags.
       *
       * This function should be called before calling xas_split().
       * If necessary, it will allocate new nodes (and fill them with @entry)
       * to prepare for the upcoming split of an entry of @order size into
       * entries of the order stored in the @xas.
       *
       * Context: May sleep if @gfp flags permit.
       */
      void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
                      gfp_t gfp)
      {
              unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
              unsigned int mask = xas->xa_sibs;
      
              /* XXX: no support for splitting really large entries yet */
              if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
                      goto nomem;
              if (xas->xa_shift + XA_CHUNK_SHIFT > order)
                      return;
      
              do {
                      unsigned int i;
                      void *sibling;
                      struct xa_node *node;
      
                      node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
                      if (!node)
                              goto nomem;
                      node->array = xas->xa;
                      for (i = 0; i < XA_CHUNK_SIZE; i++) {
                              if ((i & mask) == 0) {
                                      RCU_INIT_POINTER(node->slots[i], entry);
                                      sibling = xa_mk_sibling(0);
                              } else {
                                      RCU_INIT_POINTER(node->slots[i], sibling);
                              }
                      }
                      RCU_INIT_POINTER(node->parent, xas->xa_alloc);
                      xas->xa_alloc = node;
              } while (sibs-- > 0);
      
              return;
      nomem:
              xas_destroy(xas);
              xas_set_err(xas, -ENOMEM);
      }
      EXPORT_SYMBOL_GPL(xas_split_alloc);
      
      /**
       * xas_split() - Split a multi-index entry into smaller entries.
       * @xas: XArray operation state.
       * @entry: New entry to store in the array.
       * @order: New entry order.
       *
       * The value in the entry is copied to all the replacement entries.
       *
       * Context: Any context.  The caller should hold the xa_lock.
       */
      void xas_split(struct xa_state *xas, void *entry, unsigned int order)
      {
              unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
              unsigned int offset, marks;
              struct xa_node *node;
              void *curr = xas_load(xas);
              int values = 0;
      
              node = xas->xa_node;
              if (xas_top(node))
                      return;
      
              marks = node_get_marks(node, xas->xa_offset);
      
              offset = xas->xa_offset + sibs;
              do {
                      if (xas->xa_shift < node->shift) {
                              struct xa_node *child = xas->xa_alloc;
      
                              xas->xa_alloc = rcu_dereference_raw(child->parent);
                              child->shift = node->shift - XA_CHUNK_SHIFT;
                              child->offset = offset;
                              child->count = XA_CHUNK_SIZE;
                              child->nr_values = xa_is_value(entry) ?
                                              XA_CHUNK_SIZE : 0;
                              RCU_INIT_POINTER(child->parent, node);
                              node_set_marks(node, offset, child, marks);
                              rcu_assign_pointer(node->slots[offset],
                                              xa_mk_node(child));
                              if (xa_is_value(curr))
                                      values--;
                      } else {
                              unsigned int canon = offset - xas->xa_sibs;
      
                              node_set_marks(node, canon, NULL, marks);
                              rcu_assign_pointer(node->slots[canon], entry);
                              while (offset > canon)
                                      rcu_assign_pointer(node->slots[offset--],
                                                      xa_mk_sibling(canon));
                              values += (xa_is_value(entry) - xa_is_value(curr)) *
                                              (xas->xa_sibs + 1);
                      }
              } while (offset-- > xas->xa_offset);
      
              node->nr_values += values;
      }
      EXPORT_SYMBOL_GPL(xas_split);
      #endif
      
      /**
       * xas_pause() - Pause a walk to drop a lock.
       * @xas: XArray operation state.
       *
       * Some users need to pause a walk and drop the lock they're holding in
       * order to yield to a higher priority thread or carry out an operation
       * on an entry.  Those users should call this function before they drop
       * the lock.  It resets the @xas to be suitable for the next iteration
       * of the loop after the user has reacquired the lock.  If most entries
       * found during a walk require you to call xas_pause(), the xa_for_each()
       * iterator may be more appropriate.
       *
       * Note that xas_pause() only works for forward iteration.  If a user needs
       * to pause a reverse iteration, we will need a xas_pause_rev().
       */
      void xas_pause(struct xa_state *xas)
      {
              struct xa_node *node = xas->xa_node;
      
              if (xas_invalid(xas))
                      return;
      
              xas->xa_node = XAS_RESTART;
              if (node) {
                      unsigned long offset = xas->xa_offset;
                      while (++offset < XA_CHUNK_SIZE) {
                              if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
                                      break;
                      }
                      xas->xa_index += (offset - xas->xa_offset) << node->shift;
                      if (xas->xa_index == 0)
                              xas->xa_node = XAS_BOUNDS;
              } else {
                      xas->xa_index++;
              }
      }
      EXPORT_SYMBOL_GPL(xas_pause);
      
      /*
       * __xas_prev() - Find the previous entry in the XArray.
       * @xas: XArray operation state.
       *
       * Helper function for xas_prev() which handles all the complex cases
       * out of line.
       */
      void *__xas_prev(struct xa_state *xas)
      {
              void *entry;
      
              if (!xas_frozen(xas->xa_node))
                      xas->xa_index--;
              if (!xas->xa_node)
                      return set_bounds(xas);
              if (xas_not_node(xas->xa_node))
                      return xas_load(xas);
      
              if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
                      xas->xa_offset--;
      
              while (xas->xa_offset == 255) {
                      xas->xa_offset = xas->xa_node->offset - 1;
                      xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                      if (!xas->xa_node)
                              return set_bounds(xas);
              }
      
              for (;;) {
                      entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                      if (!xa_is_node(entry))
                              return entry;
      
                      xas->xa_node = xa_to_node(entry);
                      xas_set_offset(xas);
              }
      }
      EXPORT_SYMBOL_GPL(__xas_prev);
      
      /*
       * __xas_next() - Find the next entry in the XArray.
       * @xas: XArray operation state.
       *
       * Helper function for xas_next() which handles all the complex cases
       * out of line.
       */
      void *__xas_next(struct xa_state *xas)
      {
              void *entry;
      
              if (!xas_frozen(xas->xa_node))
                      xas->xa_index++;
              if (!xas->xa_node)
                      return set_bounds(xas);
              if (xas_not_node(xas->xa_node))
                      return xas_load(xas);
      
              if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
                      xas->xa_offset++;
      
              while (xas->xa_offset == XA_CHUNK_SIZE) {
                      xas->xa_offset = xas->xa_node->offset + 1;
                      xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                      if (!xas->xa_node)
                              return set_bounds(xas);
              }
      
              for (;;) {
                      entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                      if (!xa_is_node(entry))
                              return entry;
      
                      xas->xa_node = xa_to_node(entry);
                      xas_set_offset(xas);
              }
      }
      EXPORT_SYMBOL_GPL(__xas_next);
      
      /**
       * xas_find() - Find the next present entry in the XArray.
       * @xas: XArray operation state.
       * @max: Highest index to return.
       *
       * If the @xas has not yet been walked to an entry, return the entry
       * which has an index >= xas.xa_index.  If it has been walked, the entry
       * currently being pointed at has been processed, and so we move to the
       * next entry.
       *
       * If no entry is found and the array is smaller than @max, the iterator
       * is set to the smallest index not yet in the array.  This allows @xas
       * to be immediately passed to xas_store().
       *
       * Return: The entry, if found, otherwise %NULL.
       */
      void *xas_find(struct xa_state *xas, unsigned long max)
      {
              void *entry;
      
  102         if (xas_error(xas) || xas->xa_node == XAS_BOUNDS)
                      return NULL;
  102         if (xas->xa_index > max)
  102                 return set_bounds(xas);
      
  102         if (!xas->xa_node) {
   15                 xas->xa_index = 1;
                      return set_bounds(xas);
  102         } else if (xas->xa_node == XAS_RESTART) {
  102                 entry = xas_load(xas);
   34                 if (entry || xas_not_node(xas->xa_node))
                              return entry;
   91         } else if (!xas->xa_node->shift &&
   91                     xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
                      xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
              }
      
   92         xas_advance(xas);
      
   92         while (xas->xa_node && (xas->xa_index <= max)) {
   92                 if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
   91                         xas->xa_offset = xas->xa_node->offset + 1;
   91                         xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                              continue;
                      }
      
   92                 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
   92                 if (xa_is_node(entry)) {
   90                         xas->xa_node = xa_to_node(entry);
                              xas->xa_offset = 0;
                              continue;
                      }
   92                 if (entry && !xa_is_sibling(entry))
                              return entry;
      
   16                 xas_advance(xas);
              }
      
              if (!xas->xa_node)
                      xas->xa_node = XAS_BOUNDS;
              return NULL;
      }
      EXPORT_SYMBOL_GPL(xas_find);
      
      /**
       * xas_find_marked() - Find the next marked entry in the XArray.
       * @xas: XArray operation state.
       * @max: Highest index to return.
       * @mark: Mark number to search for.
       *
       * If the @xas has not yet been walked to an entry, return the marked entry
       * which has an index >= xas.xa_index.  If it has been walked, the entry
       * currently being pointed at has been processed, and so we return the
       * first marked entry with an index > xas.xa_index.
       *
       * If no marked entry is found and the array is smaller than @max, @xas is
       * set to the bounds state and xas->xa_index is set to the smallest index
       * not yet in the array.  This allows @xas to be immediately passed to
       * xas_store().
       *
       * If no entry is found before @max is reached, @xas is set to the restart
       * state.
       *
       * Return: The entry, if found, otherwise %NULL.
       */
      void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
      {
              bool advance = true;
              unsigned int offset;
              void *entry;
      
              if (xas_error(xas))
                      return NULL;
              if (xas->xa_index > max)
                      goto max;
      
              if (!xas->xa_node) {
                      xas->xa_index = 1;
                      goto out;
              } else if (xas_top(xas->xa_node)) {
                      advance = false;
                      entry = xa_head(xas->xa);
                      xas->xa_node = NULL;
                      if (xas->xa_index > max_index(entry))
                              goto out;
                      if (!xa_is_node(entry)) {
                              if (xa_marked(xas->xa, mark))
                                      return entry;
                              xas->xa_index = 1;
                              goto out;
                      }
                      xas->xa_node = xa_to_node(entry);
                      xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
              }
      
              while (xas->xa_index <= max) {
                      if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
                              xas->xa_offset = xas->xa_node->offset + 1;
                              xas->xa_node = xa_parent(xas->xa, xas->xa_node);
                              if (!xas->xa_node)
                                      break;
                              advance = false;
                              continue;
                      }
      
                      if (!advance) {
                              entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                              if (xa_is_sibling(entry)) {
                                      xas->xa_offset = xa_to_sibling(entry);
                                      xas_move_index(xas, xas->xa_offset);
                              }
                      }
      
                      offset = xas_find_chunk(xas, advance, mark);
                      if (offset > xas->xa_offset) {
                              advance = false;
                              xas_move_index(xas, offset);
                              /* Mind the wrap */
                              if ((xas->xa_index - 1) >= max)
                                      goto max;
                              xas->xa_offset = offset;
                              if (offset == XA_CHUNK_SIZE)
                                      continue;
                      }
      
                      entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
                      if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
                              continue;
                      if (!xa_is_node(entry))
                              return entry;
                      xas->xa_node = xa_to_node(entry);
                      xas_set_offset(xas);
              }
      
      out:
              if (xas->xa_index > max)
                      goto max;
              return set_bounds(xas);
      max:
              xas->xa_node = XAS_RESTART;
              return NULL;
      }
      EXPORT_SYMBOL_GPL(xas_find_marked);
      
      /**
       * xas_find_conflict() - Find the next present entry in a range.
       * @xas: XArray operation state.
       *
       * The @xas describes both a range and a position within that range.
       *
       * Context: Any context.  Expects xa_lock to be held.
       * Return: The next entry in the range covered by @xas or %NULL.
       */
      void *xas_find_conflict(struct xa_state *xas)
      {
              void *curr;
      
  137         if (xas_error(xas))
                      return NULL;
      
  137         if (!xas->xa_node)
                      return NULL;
      
  137         if (xas_top(xas->xa_node)) {
  137                 curr = xas_start(xas);
                      if (!curr)
                              return NULL;
  134                 while (xa_is_node(curr)) {
  134                         struct xa_node *node = xa_to_node(curr);
  134                         curr = xas_descend(xas, node);
                      }
  134                 if (curr)
                              return curr;
              }
      
  134         if (xas->xa_node->shift > xas->xa_shift)
                      return NULL;
      
              for (;;) {
                      if (xas->xa_node->shift == xas->xa_shift) {
  134                         if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
                                      break;
                      } else if (xas->xa_offset == XA_CHUNK_MASK) {
                              xas->xa_offset = xas->xa_node->offset;
                              xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
                              if (!xas->xa_node)
                                      break;
                              continue;
                      }
                      curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
                      if (xa_is_sibling(curr))
                              continue;
                      while (xa_is_node(curr)) {
                              xas->xa_node = xa_to_node(curr);
                              xas->xa_offset = 0;
                              curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
                      }
                      if (curr)
                              return curr;
              }
  134         xas->xa_offset -= xas->xa_sibs;
  137         return NULL;
      }
      EXPORT_SYMBOL_GPL(xas_find_conflict);
      
      /**
       * xa_load() - Load an entry from an XArray.
       * @xa: XArray.
       * @index: index into array.
       *
       * Context: Any context.  Takes and releases the RCU lock.
       * Return: The entry at @index in @xa.
       */
      void *xa_load(struct xarray *xa, unsigned long index)
      {
              XA_STATE(xas, xa, index);
              void *entry;
      
              rcu_read_lock();
              do {
                      entry = xas_load(&xas);
                      if (xa_is_zero(entry))
                              entry = NULL;
              } while (xas_retry(&xas, entry));
              rcu_read_unlock();
      
              return entry;
      }
      EXPORT_SYMBOL(xa_load);
      
      static void *xas_result(struct xa_state *xas, void *curr)
      {
              if (xa_is_zero(curr))
                      return NULL;
              if (xas_error(xas))
                      curr = xas->xa_node;
              return curr;
      }
      
      /**
       * __xa_erase() - Erase this entry from the XArray while locked.
       * @xa: XArray.
       * @index: Index into array.
       *
       * After this function returns, loading from @index will return %NULL.
       * If the index is part of a multi-index entry, all indices will be erased
       * and none of the entries will be part of a multi-index entry.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.
       * Return: The entry which used to be at this index.
       */
      void *__xa_erase(struct xarray *xa, unsigned long index)
      {
              XA_STATE(xas, xa, index);
              return xas_result(&xas, xas_store(&xas, NULL));
      }
      EXPORT_SYMBOL(__xa_erase);
      
      /**
       * xa_erase() - Erase this entry from the XArray.
       * @xa: XArray.
       * @index: Index of entry.
       *
       * After this function returns, loading from @index will return %NULL.
       * If the index is part of a multi-index entry, all indices will be erased
       * and none of the entries will be part of a multi-index entry.
       *
       * Context: Any context.  Takes and releases the xa_lock.
       * Return: The entry which used to be at this index.
       */
      void *xa_erase(struct xarray *xa, unsigned long index)
      {
              void *entry;
      
              xa_lock(xa);
              entry = __xa_erase(xa, index);
              xa_unlock(xa);
      
              return entry;
      }
      EXPORT_SYMBOL(xa_erase);
      
      /**
       * __xa_store() - Store this entry in the XArray.
       * @xa: XArray.
       * @index: Index into array.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * You must already be holding the xa_lock when calling this function.
       * It will drop the lock if needed to allocate memory, and then reacquire
       * it afterwards.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.  May
       * release and reacquire xa_lock if @gfp flags permit.
       * Return: The old entry at this index or xa_err() if an error happened.
       */
      void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
      {
              XA_STATE(xas, xa, index);
              void *curr;
      
              if (WARN_ON_ONCE(xa_is_advanced(entry)))
                      return XA_ERROR(-EINVAL);
              if (xa_track_free(xa) && !entry)
                      entry = XA_ZERO_ENTRY;
      
              do {
                      curr = xas_store(&xas, entry);
                      if (xa_track_free(xa))
                              xas_clear_mark(&xas, XA_FREE_MARK);
              } while (__xas_nomem(&xas, gfp));
      
              return xas_result(&xas, curr);
      }
      EXPORT_SYMBOL(__xa_store);
      
      /**
       * xa_store() - Store this entry in the XArray.
       * @xa: XArray.
       * @index: Index into array.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * After this function returns, loads from this index will return @entry.
       * Storing into an existing multi-index entry updates the entry of every index.
       * The marks associated with @index are unaffected unless @entry is %NULL.
       *
       * Context: Any context.  Takes and releases the xa_lock.
       * May sleep if the @gfp flags permit.
       * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
       * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
       * failed.
       */
      void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
      {
              void *curr;
      
              xa_lock(xa);
              curr = __xa_store(xa, index, entry, gfp);
              xa_unlock(xa);
      
              return curr;
      }
      EXPORT_SYMBOL(xa_store);
      
      /**
       * __xa_cmpxchg() - Store this entry in the XArray.
       * @xa: XArray.
       * @index: Index into array.
       * @old: Old value to test against.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * You must already be holding the xa_lock when calling this function.
       * It will drop the lock if needed to allocate memory, and then reacquire
       * it afterwards.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.  May
       * release and reacquire xa_lock if @gfp flags permit.
       * Return: The old entry at this index or xa_err() if an error happened.
       */
      void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
                              void *old, void *entry, gfp_t gfp)
      {
              XA_STATE(xas, xa, index);
              void *curr;
      
              if (WARN_ON_ONCE(xa_is_advanced(entry)))
                      return XA_ERROR(-EINVAL);
      
              do {
                      curr = xas_load(&xas);
                      if (curr == old) {
                              xas_store(&xas, entry);
                              if (xa_track_free(xa) && entry && !curr)
                                      xas_clear_mark(&xas, XA_FREE_MARK);
                      }
              } while (__xas_nomem(&xas, gfp));
      
              return xas_result(&xas, curr);
      }
      EXPORT_SYMBOL(__xa_cmpxchg);
      
      /**
       * __xa_insert() - Store this entry in the XArray if no entry is present.
       * @xa: XArray.
       * @index: Index into array.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * Inserting a NULL entry will store a reserved entry (like xa_reserve())
       * if no entry is present.  Inserting will fail if a reserved entry is
       * present, even though loading from this index will return NULL.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.  May
       * release and reacquire xa_lock if @gfp flags permit.
       * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
       * -ENOMEM if memory could not be allocated.
       */
      int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
      {
              XA_STATE(xas, xa, index);
              void *curr;
      
              if (WARN_ON_ONCE(xa_is_advanced(entry)))
                      return -EINVAL;
              if (!entry)
                      entry = XA_ZERO_ENTRY;
      
              do {
                      curr = xas_load(&xas);
                      if (!curr) {
                              xas_store(&xas, entry);
                              if (xa_track_free(xa))
                                      xas_clear_mark(&xas, XA_FREE_MARK);
                      } else {
                              xas_set_err(&xas, -EBUSY);
                      }
              } while (__xas_nomem(&xas, gfp));
      
              return xas_error(&xas);
      }
      EXPORT_SYMBOL(__xa_insert);
      
      #ifdef CONFIG_XARRAY_MULTI
      static void xas_set_range(struct xa_state *xas, unsigned long first,
                      unsigned long last)
      {
              unsigned int shift = 0;
              unsigned long sibs = last - first;
              unsigned int offset = XA_CHUNK_MASK;
      
              xas_set(xas, first);
      
              while ((first & XA_CHUNK_MASK) == 0) {
                      if (sibs < XA_CHUNK_MASK)
                              break;
                      if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
                              break;
                      shift += XA_CHUNK_SHIFT;
                      if (offset == XA_CHUNK_MASK)
                              offset = sibs & XA_CHUNK_MASK;
                      sibs >>= XA_CHUNK_SHIFT;
                      first >>= XA_CHUNK_SHIFT;
              }
      
              offset = first & XA_CHUNK_MASK;
              if (offset + sibs > XA_CHUNK_MASK)
                      sibs = XA_CHUNK_MASK - offset;
              if ((((first + sibs + 1) << shift) - 1) > last)
                      sibs -= 1;
      
              xas->xa_shift = shift;
              xas->xa_sibs = sibs;
      }
      
      /**
       * xa_store_range() - Store this entry at a range of indices in the XArray.
       * @xa: XArray.
       * @first: First index to affect.
       * @last: Last index to affect.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * After this function returns, loads from any index between @first and @last,
       * inclusive will return @entry.
       * Storing into an existing multi-index entry updates the entry of every index.
       * The marks associated with @index are unaffected unless @entry is %NULL.
       *
       * Context: Process context.  Takes and releases the xa_lock.  May sleep
       * if the @gfp flags permit.
       * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
       * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
       */
      void *xa_store_range(struct xarray *xa, unsigned long first,
                      unsigned long last, void *entry, gfp_t gfp)
      {
              XA_STATE(xas, xa, 0);
      
              if (WARN_ON_ONCE(xa_is_internal(entry)))
                      return XA_ERROR(-EINVAL);
              if (last < first)
                      return XA_ERROR(-EINVAL);
      
              do {
                      xas_lock(&xas);
                      if (entry) {
                              unsigned int order = BITS_PER_LONG;
                              if (last + 1)
                                      order = __ffs(last + 1);
                              xas_set_order(&xas, last, order);
                              xas_create(&xas, true);
                              if (xas_error(&xas))
                                      goto unlock;
                      }
                      do {
                              xas_set_range(&xas, first, last);
                              xas_store(&xas, entry);
                              if (xas_error(&xas))
                                      goto unlock;
                              first += xas_size(&xas);
                      } while (first <= last);
      unlock:
                      xas_unlock(&xas);
              } while (xas_nomem(&xas, gfp));
      
              return xas_result(&xas, NULL);
      }
      EXPORT_SYMBOL(xa_store_range);
      
      /**
       * xa_get_order() - Get the order of an entry.
       * @xa: XArray.
       * @index: Index of the entry.
       *
       * Return: A number between 0 and 63 indicating the order of the entry.
       */
      int xa_get_order(struct xarray *xa, unsigned long index)
      {
              XA_STATE(xas, xa, index);
              void *entry;
              int order = 0;
      
              rcu_read_lock();
              entry = xas_load(&xas);
      
              if (!entry)
                      goto unlock;
      
              if (!xas.xa_node)
                      goto unlock;
      
              for (;;) {
                      unsigned int slot = xas.xa_offset + (1 << order);
      
                      if (slot >= XA_CHUNK_SIZE)
                              break;
                      if (!xa_is_sibling(xas.xa_node->slots[slot]))
                              break;
                      order++;
              }
      
              order += xas.xa_node->shift;
      unlock:
              rcu_read_unlock();
      
              return order;
      }
      EXPORT_SYMBOL(xa_get_order);
      #endif /* CONFIG_XARRAY_MULTI */
      
      /**
       * __xa_alloc() - Find somewhere to store this entry in the XArray.
       * @xa: XArray.
       * @id: Pointer to ID.
       * @limit: Range for allocated ID.
       * @entry: New entry.
       * @gfp: Memory allocation flags.
       *
       * Finds an empty entry in @xa between @limit.min and @limit.max,
       * stores the index into the @id pointer, then stores the entry at
       * that index.  A concurrent lookup will not see an uninitialised @id.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.  May
       * release and reacquire xa_lock if @gfp flags permit.
       * Return: 0 on success, -ENOMEM if memory could not be allocated or
       * -EBUSY if there are no free entries in @limit.
       */
      int __xa_alloc(struct xarray *xa, u32 *id, void *entry,
                      struct xa_limit limit, gfp_t gfp)
      {
              XA_STATE(xas, xa, 0);
      
              if (WARN_ON_ONCE(xa_is_advanced(entry)))
                      return -EINVAL;
              if (WARN_ON_ONCE(!xa_track_free(xa)))
                      return -EINVAL;
      
              if (!entry)
                      entry = XA_ZERO_ENTRY;
      
              do {
                      xas.xa_index = limit.min;
                      xas_find_marked(&xas, limit.max, XA_FREE_MARK);
                      if (xas.xa_node == XAS_RESTART)
                              xas_set_err(&xas, -EBUSY);
                      else
                              *id = xas.xa_index;
                      xas_store(&xas, entry);
                      xas_clear_mark(&xas, XA_FREE_MARK);
              } while (__xas_nomem(&xas, gfp));
      
              return xas_error(&xas);
      }
      EXPORT_SYMBOL(__xa_alloc);
      
      /**
       * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
       * @xa: XArray.
       * @id: Pointer to ID.
       * @entry: New entry.
       * @limit: Range of allocated ID.
       * @next: Pointer to next ID to allocate.
       * @gfp: Memory allocation flags.
       *
       * Finds an empty entry in @xa between @limit.min and @limit.max,
       * stores the index into the @id pointer, then stores the entry at
       * that index.  A concurrent lookup will not see an uninitialised @id.
       * The search for an empty entry will start at @next and will wrap
       * around if necessary.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.  May
       * release and reacquire xa_lock if @gfp flags permit.
       * Return: 0 if the allocation succeeded without wrapping.  1 if the
       * allocation succeeded after wrapping, -ENOMEM if memory could not be
       * allocated or -EBUSY if there are no free entries in @limit.
       */
      int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
                      struct xa_limit limit, u32 *next, gfp_t gfp)
      {
              u32 min = limit.min;
              int ret;
      
              limit.min = max(min, *next);
              ret = __xa_alloc(xa, id, entry, limit, gfp);
              if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) {
                      xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED;
                      ret = 1;
              }
      
              if (ret < 0 && limit.min > min) {
                      limit.min = min;
                      ret = __xa_alloc(xa, id, entry, limit, gfp);
                      if (ret == 0)
                              ret = 1;
              }
      
              if (ret >= 0) {
                      *next = *id + 1;
                      if (*next == 0)
                              xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED;
              }
              return ret;
      }
      EXPORT_SYMBOL(__xa_alloc_cyclic);
      
      /**
       * __xa_set_mark() - Set this mark on this entry while locked.
       * @xa: XArray.
       * @index: Index of entry.
       * @mark: Mark number.
       *
       * Attempting to set a mark on a %NULL entry does not succeed.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.
       */
      void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
      {
              XA_STATE(xas, xa, index);
              void *entry = xas_load(&xas);
      
              if (entry)
                      xas_set_mark(&xas, mark);
      }
      EXPORT_SYMBOL(__xa_set_mark);
      
      /**
       * __xa_clear_mark() - Clear this mark on this entry while locked.
       * @xa: XArray.
       * @index: Index of entry.
       * @mark: Mark number.
       *
       * Context: Any context.  Expects xa_lock to be held on entry.
       */
      void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
      {
              XA_STATE(xas, xa, index);
              void *entry = xas_load(&xas);
      
              if (entry)
                      xas_clear_mark(&xas, mark);
      }
      EXPORT_SYMBOL(__xa_clear_mark);
      
      /**
       * xa_get_mark() - Inquire whether this mark is set on this entry.
       * @xa: XArray.
       * @index: Index of entry.
       * @mark: Mark number.
       *
       * This function uses the RCU read lock, so the result may be out of date
       * by the time it returns.  If you need the result to be stable, use a lock.
       *
       * Context: Any context.  Takes and releases the RCU lock.
       * Return: True if the entry at @index has this mark set, false if it doesn't.
       */
      bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
      {
              XA_STATE(xas, xa, index);
              void *entry;
      
              rcu_read_lock();
              entry = xas_start(&xas);
              while (xas_get_mark(&xas, mark)) {
                      if (!xa_is_node(entry))
                              goto found;
                      entry = xas_descend(&xas, xa_to_node(entry));
              }
              rcu_read_unlock();
              return false;
       found:
              rcu_read_unlock();
              return true;
      }
      EXPORT_SYMBOL(xa_get_mark);
      
      /**
       * xa_set_mark() - Set this mark on this entry.
       * @xa: XArray.
       * @index: Index of entry.
       * @mark: Mark number.
       *
       * Attempting to set a mark on a %NULL entry does not succeed.
       *
       * Context: Process context.  Takes and releases the xa_lock.
       */
      void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
      {
              xa_lock(xa);
              __xa_set_mark(xa, index, mark);
              xa_unlock(xa);
      }
      EXPORT_SYMBOL(xa_set_mark);
      
      /**
       * xa_clear_mark() - Clear this mark on this entry.
       * @xa: XArray.
       * @index: Index of entry.
       * @mark: Mark number.
       *
       * Clearing a mark always succeeds.
       *
       * Context: Process context.  Takes and releases the xa_lock.
       */
      void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
      {
              xa_lock(xa);
              __xa_clear_mark(xa, index, mark);
              xa_unlock(xa);
      }
      EXPORT_SYMBOL(xa_clear_mark);
      
      /**
       * xa_find() - Search the XArray for an entry.
       * @xa: XArray.
       * @indexp: Pointer to an index.
       * @max: Maximum index to search to.
       * @filter: Selection criterion.
       *
       * Finds the entry in @xa which matches the @filter, and has the lowest
       * index that is at least @indexp and no more than @max.
       * If an entry is found, @indexp is updated to be the index of the entry.
       * This function is protected by the RCU read lock, so it may not find
       * entries which are being simultaneously added.  It will not return an
       * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
       *
       * Context: Any context.  Takes and releases the RCU lock.
       * Return: The entry, if found, otherwise %NULL.
       */
      void *xa_find(struct xarray *xa, unsigned long *indexp,
                              unsigned long max, xa_mark_t filter)
      {
              XA_STATE(xas, xa, *indexp);
              void *entry;
      
              rcu_read_lock();
              do {
                      if ((__force unsigned int)filter < XA_MAX_MARKS)
                              entry = xas_find_marked(&xas, max, filter);
                      else
                              entry = xas_find(&xas, max);
              } while (xas_retry(&xas, entry));
              rcu_read_unlock();
      
              if (entry)
                      *indexp = xas.xa_index;
              return entry;
      }
      EXPORT_SYMBOL(xa_find);
      
      static bool xas_sibling(struct xa_state *xas)
      {
              struct xa_node *node = xas->xa_node;
              unsigned long mask;
      
              if (!IS_ENABLED(CONFIG_XARRAY_MULTI) || !node)
                      return false;
              mask = (XA_CHUNK_SIZE << node->shift) - 1;
              return (xas->xa_index & mask) >
                      ((unsigned long)xas->xa_offset << node->shift);
      }
      
      /**
       * xa_find_after() - Search the XArray for a present entry.
       * @xa: XArray.
       * @indexp: Pointer to an index.
       * @max: Maximum index to search to.
       * @filter: Selection criterion.
       *
       * Finds the entry in @xa which matches the @filter and has the lowest
       * index that is above @indexp and no more than @max.
       * If an entry is found, @indexp is updated to be the index of the entry.
       * This function is protected by the RCU read lock, so it may miss entries
       * which are being simultaneously added.  It will not return an
       * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
       *
       * Context: Any context.  Takes and releases the RCU lock.
       * Return: The pointer, if found, otherwise %NULL.
       */
      void *xa_find_after(struct xarray *xa, unsigned long *indexp,
                              unsigned long max, xa_mark_t filter)
      {
              XA_STATE(xas, xa, *indexp + 1);
              void *entry;
      
              if (xas.xa_index == 0)
                      return NULL;
      
              rcu_read_lock();
              for (;;) {
                      if ((__force unsigned int)filter < XA_MAX_MARKS)
                              entry = xas_find_marked(&xas, max, filter);
                      else
                              entry = xas_find(&xas, max);
      
                      if (xas_invalid(&xas))
                              break;
                      if (xas_sibling(&xas))
                              continue;
                      if (!xas_retry(&xas, entry))
                              break;
              }
              rcu_read_unlock();
      
              if (entry)
                      *indexp = xas.xa_index;
              return entry;
      }
      EXPORT_SYMBOL(xa_find_after);
      
      static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
                              unsigned long max, unsigned int n)
      {
              void *entry;
              unsigned int i = 0;
      
              rcu_read_lock();
              xas_for_each(xas, entry, max) {
                      if (xas_retry(xas, entry))
                              continue;
                      dst[i++] = entry;
                      if (i == n)
                              break;
              }
              rcu_read_unlock();
      
              return i;
      }
      
      static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
                              unsigned long max, unsigned int n, xa_mark_t mark)
      {
              void *entry;
              unsigned int i = 0;
      
              rcu_read_lock();
              xas_for_each_marked(xas, entry, max, mark) {
                      if (xas_retry(xas, entry))
                              continue;
                      dst[i++] = entry;
                      if (i == n)
                              break;
              }
              rcu_read_unlock();
      
              return i;
      }
      
      /**
       * xa_extract() - Copy selected entries from the XArray into a normal array.
       * @xa: The source XArray to copy from.
       * @dst: The buffer to copy entries into.
       * @start: The first index in the XArray eligible to be selected.
       * @max: The last index in the XArray eligible to be selected.
       * @n: The maximum number of entries to copy.
       * @filter: Selection criterion.
       *
       * Copies up to @n entries that match @filter from the XArray.  The
       * copied entries will have indices between @start and @max, inclusive.
       *
       * The @filter may be an XArray mark value, in which case entries which are
       * marked with that mark will be copied.  It may also be %XA_PRESENT, in
       * which case all entries which are not %NULL will be copied.
       *
       * The entries returned may not represent a snapshot of the XArray at a
       * moment in time.  For example, if another thread stores to index 5, then
       * index 10, calling xa_extract() may return the old contents of index 5
       * and the new contents of index 10.  Indices not modified while this
       * function is running will not be skipped.
       *
       * If you need stronger guarantees, holding the xa_lock across calls to this
       * function will prevent concurrent modification.
       *
       * Context: Any context.  Takes and releases the RCU lock.
       * Return: The number of entries copied.
       */
      unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
                              unsigned long max, unsigned int n, xa_mark_t filter)
      {
              XA_STATE(xas, xa, start);
      
              if (!n)
                      return 0;
      
              if ((__force unsigned int)filter < XA_MAX_MARKS)
                      return xas_extract_marked(&xas, dst, max, n, filter);
              return xas_extract_present(&xas, dst, max, n);
      }
      EXPORT_SYMBOL(xa_extract);
      
      /**
       * xa_delete_node() - Private interface for workingset code.
       * @node: Node to be removed from the tree.
       * @update: Function to call to update ancestor nodes.
       *
       * Context: xa_lock must be held on entry and will not be released.
       */
      void xa_delete_node(struct xa_node *node, xa_update_node_t update)
      {
              struct xa_state xas = {
                      .xa = node->array,
                      .xa_index = (unsigned long)node->offset <<
                                      (node->shift + XA_CHUNK_SHIFT),
                      .xa_shift = node->shift + XA_CHUNK_SHIFT,
                      .xa_offset = node->offset,
                      .xa_node = xa_parent_locked(node->array, node),
                      .xa_update = update,
              };
      
              xas_store(&xas, NULL);
      }
      EXPORT_SYMBOL_GPL(xa_delete_node);        /* For the benefit of the test suite */
      
      /**
       * xa_destroy() - Free all internal data structures.
       * @xa: XArray.
       *
       * After calling this function, the XArray is empty and has freed all memory
       * allocated for its internal data structures.  You are responsible for
       * freeing the objects referenced by the XArray.
       *
       * Context: Any context.  Takes and releases the xa_lock, interrupt-safe.
       */
      void xa_destroy(struct xarray *xa)
      {
              XA_STATE(xas, xa, 0);
              unsigned long flags;
              void *entry;
      
              xas.xa_node = NULL;
              xas_lock_irqsave(&xas, flags);
              entry = xa_head_locked(xa);
              RCU_INIT_POINTER(xa->xa_head, NULL);
              xas_init_marks(&xas);
              if (xa_zero_busy(xa))
                      xa_mark_clear(xa, XA_FREE_MARK);
              /* lockdep checks we're still holding the lock in xas_free_nodes() */
              if (xa_is_node(entry))
                      xas_free_nodes(&xas, xa_to_node(entry));
              xas_unlock_irqrestore(&xas, flags);
      }
      EXPORT_SYMBOL(xa_destroy);
      
      #ifdef XA_DEBUG
      void xa_dump_node(const struct xa_node *node)
      {
              unsigned i, j;
      
              if (!node)
                      return;
              if ((unsigned long)node & 3) {
                      pr_cont("node %px\n", node);
                      return;
              }
      
              pr_cont("node %px %s %d parent %px shift %d count %d values %d "
                      "array %px list %px %px marks",
                      node, node->parent ? "offset" : "max", node->offset,
                      node->parent, node->shift, node->count, node->nr_values,
                      node->array, node->private_list.prev, node->private_list.next);
              for (i = 0; i < XA_MAX_MARKS; i++)
                      for (j = 0; j < XA_MARK_LONGS; j++)
                              pr_cont(" %lx", node->marks[i][j]);
              pr_cont("\n");
      }
      
      void xa_dump_index(unsigned long index, unsigned int shift)
      {
              if (!shift)
                      pr_info("%lu: ", index);
              else if (shift >= BITS_PER_LONG)
                      pr_info("0-%lu: ", ~0UL);
              else
                      pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
      }
      
      void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
      {
              if (!entry)
                      return;
      
              xa_dump_index(index, shift);
      
              if (xa_is_node(entry)) {
                      if (shift == 0) {
                              pr_cont("%px\n", entry);
                      } else {
                              unsigned long i;
                              struct xa_node *node = xa_to_node(entry);
                              xa_dump_node(node);
                              for (i = 0; i < XA_CHUNK_SIZE; i++)
                                      xa_dump_entry(node->slots[i],
                                            index + (i << node->shift), node->shift);
                      }
              } else if (xa_is_value(entry))
                      pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
                                                      xa_to_value(entry), entry);
              else if (!xa_is_internal(entry))
                      pr_cont("%px\n", entry);
              else if (xa_is_retry(entry))
                      pr_cont("retry (%ld)\n", xa_to_internal(entry));
              else if (xa_is_sibling(entry))
                      pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
              else if (xa_is_zero(entry))
                      pr_cont("zero (%ld)\n", xa_to_internal(entry));
              else
                      pr_cont("UNKNOWN ENTRY (%px)\n", entry);
      }
      
      void xa_dump(const struct xarray *xa)
      {
              void *entry = xa->xa_head;
              unsigned int shift = 0;
      
              pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
                              xa->xa_flags, xa_marked(xa, XA_MARK_0),
                              xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
              if (xa_is_node(entry))
                      shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
              xa_dump_entry(entry, 0, shift);
      }
      #endif
      /* SPDX-License-Identifier: GPL-2.0 */
      #ifndef __LINUX_SPINLOCK_H
      #define __LINUX_SPINLOCK_H
      
      /*
       * include/linux/spinlock.h - generic spinlock/rwlock declarations
       *
       * here's the role of the various spinlock/rwlock related include files:
       *
       * on SMP builds:
       *
       *  asm/spinlock_types.h: contains the arch_spinlock_t/arch_rwlock_t and the
       *                        initializers
       *
       *  linux/spinlock_types.h:
       *                        defines the generic type and initializers
       *
       *  asm/spinlock.h:       contains the arch_spin_*()/etc. lowlevel
       *                        implementations, mostly inline assembly code
       *
       *   (also included on UP-debug builds:)
       *
       *  linux/spinlock_api_smp.h:
       *                        contains the prototypes for the _spin_*() APIs.
       *
       *  linux/spinlock.h:     builds the final spin_*() APIs.
       *
       * on UP builds:
       *
       *  linux/spinlock_type_up.h:
       *                        contains the generic, simplified UP spinlock type.
       *                        (which is an empty structure on non-debug builds)
       *
       *  linux/spinlock_types.h:
       *                        defines the generic type and initializers
       *
       *  linux/spinlock_up.h:
       *                        contains the arch_spin_*()/etc. version of UP
       *                        builds. (which are NOPs on non-debug, non-preempt
       *                        builds)
       *
       *   (included on UP-non-debug builds:)
       *
       *  linux/spinlock_api_up.h:
       *                        builds the _spin_*() APIs.
       *
       *  linux/spinlock.h:     builds the final spin_*() APIs.
       */
      
      #include <linux/typecheck.h>
      #include <linux/preempt.h>
      #include <linux/linkage.h>
      #include <linux/compiler.h>
      #include <linux/irqflags.h>
      #include <linux/thread_info.h>
      #include <linux/kernel.h>
      #include <linux/stringify.h>
      #include <linux/bottom_half.h>
      #include <linux/lockdep.h>
      #include <asm/barrier.h>
      #include <asm/mmiowb.h>
      
      
      /*
       * Must define these before including other files, inline functions need them
       */
      #define LOCK_SECTION_NAME ".text..lock."KBUILD_BASENAME
      
      #define LOCK_SECTION_START(extra)               \
              ".subsection 1\n\t"                     \
              extra                                   \
              ".ifndef " LOCK_SECTION_NAME "\n\t"     \
              LOCK_SECTION_NAME ":\n\t"               \
              ".endif\n"
      
      #define LOCK_SECTION_END                        \
              ".previous\n\t"
      
      #define __lockfunc __section(".spinlock.text")
      
      /*
       * Pull the arch_spinlock_t and arch_rwlock_t definitions:
       */
      #include <linux/spinlock_types.h>
      
      /*
       * Pull the arch_spin*() functions/declarations (UP-nondebug doesn't need them):
       */
      #ifdef CONFIG_SMP
      # include <asm/spinlock.h>
      #else
      # include <linux/spinlock_up.h>
      #endif
      
      #ifdef CONFIG_DEBUG_SPINLOCK
        extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
                                         struct lock_class_key *key, short inner);
      
      # define raw_spin_lock_init(lock)                                        \
      do {                                                                        \
              static struct lock_class_key __key;                                \
                                                                              \
              __raw_spin_lock_init((lock), #lock, &__key, LD_WAIT_SPIN);        \
      } while (0)
      
      #else
      # define raw_spin_lock_init(lock)                                \
              do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
      #endif
      
      #define raw_spin_is_locked(lock)        arch_spin_is_locked(&(lock)->raw_lock)
      
      #ifdef arch_spin_is_contended
      #define raw_spin_is_contended(lock)        arch_spin_is_contended(&(lock)->raw_lock)
      #else
      #define raw_spin_is_contended(lock)        (((void)(lock), 0))
      #endif /*arch_spin_is_contended*/
      
      /*
       * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
       * between program-order earlier lock acquisitions and program-order later
       * memory accesses.
       *
       * This guarantees that the following two properties hold:
       *
       *   1) Given the snippet:
       *
       *          { X = 0;  Y = 0; }
       *
       *          CPU0                                CPU1
       *
       *          WRITE_ONCE(X, 1);                WRITE_ONCE(Y, 1);
       *          spin_lock(S);                        smp_mb();
       *          smp_mb__after_spinlock();        r1 = READ_ONCE(X);
       *          r0 = READ_ONCE(Y);
       *          spin_unlock(S);
       *
       *      it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
       *      and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
       *      preceding the call to smp_mb__after_spinlock() in __schedule() and in
       *      try_to_wake_up().
       *
       *   2) Given the snippet:
       *
       *  { X = 0;  Y = 0; }
       *
       *  CPU0                CPU1                                CPU2
       *
       *  spin_lock(S);        spin_lock(S);                        r1 = READ_ONCE(Y);
       *  WRITE_ONCE(X, 1);        smp_mb__after_spinlock();        smp_rmb();
       *  spin_unlock(S);        r0 = READ_ONCE(X);                r2 = READ_ONCE(X);
       *                        WRITE_ONCE(Y, 1);
       *                        spin_unlock(S);
       *
       *      it is forbidden that CPU0's critical section executes before CPU1's
       *      critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
       *      and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
       *      preceding the calls to smp_rmb() in try_to_wake_up() for similar
       *      snippets but "projected" onto two CPUs.
       *
       * Property (2) upgrades the lock to an RCsc lock.
       *
       * Since most load-store architectures implement ACQUIRE with an smp_mb() after
       * the LL/SC loop, they need no further barriers. Similarly all our TSO
       * architectures imply an smp_mb() for each atomic instruction and equally don't
       * need more.
       *
       * Architectures that can implement ACQUIRE better need to take care.
       */
      #ifndef smp_mb__after_spinlock
      #define smp_mb__after_spinlock()        do { } while (0)
      #endif
      
      #ifdef CONFIG_DEBUG_SPINLOCK
       extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
      #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
       extern int do_raw_spin_trylock(raw_spinlock_t *lock);
       extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
      #else
      static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
      {
              __acquire(lock);
              arch_spin_lock(&lock->raw_lock);
              mmiowb_spin_lock();
      }
      
      #ifndef arch_spin_lock_flags
      #define arch_spin_lock_flags(lock, flags)        arch_spin_lock(lock)
      #endif
      
      static inline void
      do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
      {
              __acquire(lock);
              arch_spin_lock_flags(&lock->raw_lock, *flags);
              mmiowb_spin_lock();
      }
      
      static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
      {
              int ret = arch_spin_trylock(&(lock)->raw_lock);
      
              if (ret)
                      mmiowb_spin_lock();
      
              return ret;
      }
      
      static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
      {
              mmiowb_spin_unlock();
              arch_spin_unlock(&lock->raw_lock);
              __release(lock);
      }
      #endif
      
      /*
       * Define the various spin_lock methods.  Note we define these
       * regardless of whether CONFIG_SMP or CONFIG_PREEMPTION are set. The
       * various methods are defined as nops in the case they are not
       * required.
       */
      #define raw_spin_trylock(lock)        __cond_lock(lock, _raw_spin_trylock(lock))
      
      #define raw_spin_lock(lock)        _raw_spin_lock(lock)
      
      #ifdef CONFIG_DEBUG_LOCK_ALLOC
      # define raw_spin_lock_nested(lock, subclass) \
              _raw_spin_lock_nested(lock, subclass)
      
      # define raw_spin_lock_nest_lock(lock, nest_lock)                        \
               do {                                                                \
                       typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\
                       _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);        \
               } while (0)
      #else
      /*
       * Always evaluate the 'subclass' argument to avoid that the compiler
       * warns about set-but-not-used variables when building with
       * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1.
       */
      # define raw_spin_lock_nested(lock, subclass)                \
              _raw_spin_lock(((void)(subclass), (lock)))
      # define raw_spin_lock_nest_lock(lock, nest_lock)        _raw_spin_lock(lock)
      #endif
      
      #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
      
      #define raw_spin_lock_irqsave(lock, flags)                        \
              do {                                                \
                      typecheck(unsigned long, flags);        \
                      flags = _raw_spin_lock_irqsave(lock);        \
              } while (0)
      
      #ifdef CONFIG_DEBUG_LOCK_ALLOC
      #define raw_spin_lock_irqsave_nested(lock, flags, subclass)                \
              do {                                                                \
                      typecheck(unsigned long, flags);                        \
                      flags = _raw_spin_lock_irqsave_nested(lock, subclass);        \
              } while (0)
      #else
      #define raw_spin_lock_irqsave_nested(lock, flags, subclass)                \
              do {                                                                \
                      typecheck(unsigned long, flags);                        \
                      flags = _raw_spin_lock_irqsave(lock);                        \
              } while (0)
      #endif
      
      #else
      
      #define raw_spin_lock_irqsave(lock, flags)                \
              do {                                                \
                      typecheck(unsigned long, flags);        \
                      _raw_spin_lock_irqsave(lock, flags);        \
              } while (0)
      
      #define raw_spin_lock_irqsave_nested(lock, flags, subclass)        \
              raw_spin_lock_irqsave(lock, flags)
      
      #endif
      
      #define raw_spin_lock_irq(lock)                _raw_spin_lock_irq(lock)
      #define raw_spin_lock_bh(lock)                _raw_spin_lock_bh(lock)
      #define raw_spin_unlock(lock)                _raw_spin_unlock(lock)
      #define raw_spin_unlock_irq(lock)        _raw_spin_unlock_irq(lock)
      
      #define raw_spin_unlock_irqrestore(lock, flags)                \
              do {                                                        \
                      typecheck(unsigned long, flags);                \
                      _raw_spin_unlock_irqrestore(lock, flags);        \
              } while (0)
      #define raw_spin_unlock_bh(lock)        _raw_spin_unlock_bh(lock)
      
      #define raw_spin_trylock_bh(lock) \
              __cond_lock(lock, _raw_spin_trylock_bh(lock))
      
      #define raw_spin_trylock_irq(lock) \
      ({ \
              local_irq_disable(); \
              raw_spin_trylock(lock) ? \
              1 : ({ local_irq_enable(); 0;  }); \
      })
      
      #define raw_spin_trylock_irqsave(lock, flags) \
      ({ \
              local_irq_save(flags); \
              raw_spin_trylock(lock) ? \
              1 : ({ local_irq_restore(flags); 0; }); \
      })
      
      /* Include rwlock functions */
      #include <linux/rwlock.h>
      
      /*
       * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
       */
      #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
      # include <linux/spinlock_api_smp.h>
      #else
      # include <linux/spinlock_api_up.h>
      #endif
      
      /*
       * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
       */
      
      static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
      {
  235         return &lock->rlock;
      }
      
      #ifdef CONFIG_DEBUG_SPINLOCK
      
      # define spin_lock_init(lock)                                        \
      do {                                                                \
              static struct lock_class_key __key;                        \
                                                                      \
              __raw_spin_lock_init(spinlock_check(lock),                \
                                   #lock, &__key, LD_WAIT_CONFIG);        \
      } while (0)
      
      #else
      
      # define spin_lock_init(_lock)                        \
      do {                                                \
              spinlock_check(_lock);                        \
              *(_lock) = __SPIN_LOCK_UNLOCKED(_lock);        \
      } while (0)
      
      #endif
      
      static __always_inline void spin_lock(spinlock_t *lock)
      {
  608         raw_spin_lock(&lock->rlock);
      }
      
      static __always_inline void spin_lock_bh(spinlock_t *lock)
      {
              raw_spin_lock_bh(&lock->rlock);
      }
      
      static __always_inline int spin_trylock(spinlock_t *lock)
      {
   79         return raw_spin_trylock(&lock->rlock);
      }
      
      #define spin_lock_nested(lock, subclass)                        \
      do {                                                                \
              raw_spin_lock_nested(spinlock_check(lock), subclass);        \
      } while (0)
      
      #define spin_lock_nest_lock(lock, nest_lock)                                \
      do {                                                                        \
              raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);        \
      } while (0)
      
      static __always_inline void spin_lock_irq(spinlock_t *lock)
      {
   94         raw_spin_lock_irq(&lock->rlock);
      }
      
      #define spin_lock_irqsave(lock, flags)                                \
      do {                                                                \
              raw_spin_lock_irqsave(spinlock_check(lock), flags);        \
      } while (0)
      
      #define spin_lock_irqsave_nested(lock, flags, subclass)                        \
      do {                                                                        \
              raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \
      } while (0)
      
      static __always_inline void spin_unlock(spinlock_t *lock)
      {
  617         raw_spin_unlock(&lock->rlock);
      }
      
      static __always_inline void spin_unlock_bh(spinlock_t *lock)
      {
              raw_spin_unlock_bh(&lock->rlock);
      }
      
      static __always_inline void spin_unlock_irq(spinlock_t *lock)
      {
   62         raw_spin_unlock_irq(&lock->rlock);
      }
      
      static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
      {
   63         raw_spin_unlock_irqrestore(&lock->rlock, flags);
      }
      
      static __always_inline int spin_trylock_bh(spinlock_t *lock)
      {
              return raw_spin_trylock_bh(&lock->rlock);
      }
      
      static __always_inline int spin_trylock_irq(spinlock_t *lock)
      {
              return raw_spin_trylock_irq(&lock->rlock);
      }
      
      #define spin_trylock_irqsave(lock, flags)                        \
      ({                                                                \
              raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
      })
      
      /**
       * spin_is_locked() - Check whether a spinlock is locked.
       * @lock: Pointer to the spinlock.
       *
       * This function is NOT required to provide any memory ordering
       * guarantees; it could be used for debugging purposes or, when
       * additional synchronization is needed, accompanied with other
       * constructs (memory barriers) enforcing the synchronization.
       *
       * Returns: 1 if @lock is locked, 0 otherwise.
       *
       * Note that the function only tells you that the spinlock is
       * seen to be locked, not that it is locked on your CPU.
       *
       * Further, on CONFIG_SMP=n builds with CONFIG_DEBUG_SPINLOCK=n,
       * the return value is always 0 (see include/linux/spinlock_up.h).
       * Therefore you should not rely heavily on the return value.
       */
      static __always_inline int spin_is_locked(spinlock_t *lock)
      {
              return raw_spin_is_locked(&lock->rlock);
      }
      
      static __always_inline int spin_is_contended(spinlock_t *lock)
      {
              return raw_spin_is_contended(&lock->rlock);
      }
      
      #define assert_spin_locked(lock)        assert_raw_spin_locked(&(lock)->rlock)
      
      /*
       * Pull the atomic_t declaration:
       * (asm-mips/atomic.h needs above definitions)
       */
      #include <linux/atomic.h>
      /**
       * atomic_dec_and_lock - lock on reaching reference count zero
       * @atomic: the atomic counter
       * @lock: the spinlock in question
       *
       * Decrements @atomic by 1.  If the result is 0, returns true and locks
       * @lock.  Returns false for all other cases.
       */
      extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
      #define atomic_dec_and_lock(atomic, lock) \
                      __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
      
      extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
                                              unsigned long *flags);
      #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
                      __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
      
      int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
                                   size_t max_size, unsigned int cpu_mult,
                                   gfp_t gfp, const char *name,
                                   struct lock_class_key *key);
      
      #define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp)    \
              ({                                                                     \
                      static struct lock_class_key key;                             \
                      int ret;                                                     \
                                                                                   \
                      ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size,   \
                                                     cpu_mult, gfp, #locks, &key); \
                      ret;                                                             \
              })
      
      void free_bucket_spinlocks(spinlock_t *locks);
      
      #endif /* __LINUX_SPINLOCK_H */
      /* SPDX-License-Identifier: GPL-2.0 */
      /*
       * Linux Socket Filter Data Structures
       */
      #ifndef __LINUX_FILTER_H__
      #define __LINUX_FILTER_H__
      
      #include <stdarg.h>
      
      #include <linux/atomic.h>
      #include <linux/refcount.h>
      #include <linux/compat.h>
      #include <linux/skbuff.h>
      #include <linux/linkage.h>
      #include <linux/printk.h>
      #include <linux/workqueue.h>
      #include <linux/sched.h>
      #include <linux/capability.h>
      #include <linux/set_memory.h>
      #include <linux/kallsyms.h>
      #include <linux/if_vlan.h>
      #include <linux/vmalloc.h>
      #include <linux/sockptr.h>
      #include <crypto/sha1.h>
      
      #include <net/sch_generic.h>
      
      #include <asm/byteorder.h>
      #include <uapi/linux/filter.h>
      #include <uapi/linux/bpf.h>
      
      struct sk_buff;
      struct sock;
      struct seccomp_data;
      struct bpf_prog_aux;
      struct xdp_rxq_info;
      struct xdp_buff;
      struct sock_reuseport;
      struct ctl_table;
      struct ctl_table_header;
      
      /* ArgX, context and stack frame pointer register positions. Note,
       * Arg1, Arg2, Arg3, etc are used as argument mappings of function
       * calls in BPF_CALL instruction.
       */
      #define BPF_REG_ARG1        BPF_REG_1
      #define BPF_REG_ARG2        BPF_REG_2
      #define BPF_REG_ARG3        BPF_REG_3
      #define BPF_REG_ARG4        BPF_REG_4
      #define BPF_REG_ARG5        BPF_REG_5
      #define BPF_REG_CTX        BPF_REG_6
      #define BPF_REG_FP        BPF_REG_10
      
      /* Additional register mappings for converted user programs. */
      #define BPF_REG_A        BPF_REG_0
      #define BPF_REG_X        BPF_REG_7
      #define BPF_REG_TMP        BPF_REG_2        /* scratch reg */
      #define BPF_REG_D        BPF_REG_8        /* data, callee-saved */
      #define BPF_REG_H        BPF_REG_9        /* hlen, callee-saved */
      
      /* Kernel hidden auxiliary/helper register. */
      #define BPF_REG_AX                MAX_BPF_REG
      #define MAX_BPF_EXT_REG                (MAX_BPF_REG + 1)
      #define MAX_BPF_JIT_REG                MAX_BPF_EXT_REG
      
      /* unused opcode to mark special call to bpf_tail_call() helper */
      #define BPF_TAIL_CALL        0xf0
      
      /* unused opcode to mark special load instruction. Same as BPF_ABS */
      #define BPF_PROBE_MEM        0x20
      
      /* unused opcode to mark call to interpreter with arguments */
      #define BPF_CALL_ARGS        0xe0
      
      /* As per nm, we expose JITed images as text (code) section for
       * kallsyms. That way, tools like perf can find it to match
       * addresses.
       */
      #define BPF_SYM_ELF_TYPE        't'
      
      /* BPF program can access up to 512 bytes of stack space. */
      #define MAX_BPF_STACK        512
      
      /* Helper macros for filter block array initializers. */
      
      /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
      
      #define BPF_ALU64_REG(OP, DST, SRC)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = 0 })
      
      #define BPF_ALU32_REG(OP, DST, SRC)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_OP(OP) | BPF_X,                \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = 0 })
      
      /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
      
      #define BPF_ALU64_IMM(OP, DST, IMM)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,        \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      #define BPF_ALU32_IMM(OP, DST, IMM)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_OP(OP) | BPF_K,                \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
      
      #define BPF_ENDIAN(TYPE, DST, LEN)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),        \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = LEN })
      
      /* Short form of mov, dst_reg = src_reg */
      
      #define BPF_MOV64_REG(DST, SRC)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU64 | BPF_MOV | BPF_X,                \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = 0 })
      
      #define BPF_MOV32_REG(DST, SRC)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_MOV | BPF_X,                \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = 0 })
      
      /* Short form of mov, dst_reg = imm32 */
      
      #define BPF_MOV64_IMM(DST, IMM)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU64 | BPF_MOV | BPF_K,                \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      #define BPF_MOV32_IMM(DST, IMM)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_MOV | BPF_K,                \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      /* Special form of mov32, used for doing explicit zero extension on dst. */
      #define BPF_ZEXT_REG(DST)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_MOV | BPF_X,                \
                      .dst_reg = DST,                                        \
                      .src_reg = DST,                                        \
                      .off   = 0,                                        \
                      .imm   = 1 })
      
      static inline bool insn_is_zext(const struct bpf_insn *insn)
      {
              return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
      }
      
      /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
      #define BPF_LD_IMM64(DST, IMM)                                        \
              BPF_LD_IMM64_RAW(DST, 0, IMM)
      
      #define BPF_LD_IMM64_RAW(DST, SRC, IMM)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_LD | BPF_DW | BPF_IMM,                \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = (__u32) (IMM) }),                        \
              ((struct bpf_insn) {                                        \
                      .code  = 0, /* zero is reserved opcode */        \
                      .dst_reg = 0,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = ((__u64) (IMM)) >> 32 })
      
      /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
      #define BPF_LD_MAP_FD(DST, MAP_FD)                                \
              BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
      
      /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
      
      #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
      
      #define BPF_LD_ABS(SIZE, IMM)                                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,        \
                      .dst_reg = 0,                                        \
                      .src_reg = 0,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
      
      #define BPF_LD_IND(SIZE, SRC, IMM)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,        \
                      .dst_reg = 0,                                        \
                      .src_reg = SRC,                                        \
                      .off   = 0,                                        \
                      .imm   = IMM })
      
      /* Memory load, dst_reg = *(uint *) (src_reg + off16) */
      
      #define BPF_LDX_MEM(SIZE, DST, SRC, OFF)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = OFF,                                        \
                      .imm   = 0 })
      
      /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
      
      #define BPF_STX_MEM(SIZE, DST, SRC, OFF)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = OFF,                                        \
                      .imm   = 0 })
      
      /* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
      
      #define BPF_STX_XADD(SIZE, DST, SRC, OFF)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = OFF,                                        \
                      .imm   = 0 })
      
      /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
      
      #define BPF_ST_MEM(SIZE, DST, OFF, IMM)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,        \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = OFF,                                        \
                      .imm   = IMM })
      
      /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
      
      #define BPF_JMP_REG(OP, DST, SRC, OFF)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_JMP | BPF_OP(OP) | BPF_X,                \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = OFF,                                        \
                      .imm   = 0 })
      
      /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
      
      #define BPF_JMP_IMM(OP, DST, IMM, OFF)                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_JMP | BPF_OP(OP) | BPF_K,                \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = OFF,                                        \
                      .imm   = IMM })
      
      /* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
      
      #define BPF_JMP32_REG(OP, DST, SRC, OFF)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,        \
                      .dst_reg = DST,                                        \
                      .src_reg = SRC,                                        \
                      .off   = OFF,                                        \
                      .imm   = 0 })
      
      /* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
      
      #define BPF_JMP32_IMM(OP, DST, IMM, OFF)                        \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,        \
                      .dst_reg = DST,                                        \
                      .src_reg = 0,                                        \
                      .off   = OFF,                                        \
                      .imm   = IMM })
      
      /* Unconditional jumps, goto pc + off16 */
      
      #define BPF_JMP_A(OFF)                                                \
              ((struct bpf_insn) {                                        \
                      .code  = BPF_JMP | BPF_JA,                        \
                      .dst_reg = 0,                                        \
                      .src_reg = 0,                                        \