/* SPDX-License-Identifier: GPL-2.0 */
      /*
       * connection tracking event cache.
       */
      
      #ifndef _NF_CONNTRACK_ECACHE_H
      #define _NF_CONNTRACK_ECACHE_H
      #include <net/netfilter/nf_conntrack.h>
      
      #include <net/net_namespace.h>
      #include <net/netfilter/nf_conntrack_expect.h>
      #include <linux/netfilter/nf_conntrack_common.h>
      #include <linux/netfilter/nf_conntrack_tuple_common.h>
      #include <net/netfilter/nf_conntrack_extend.h>
      
      enum nf_ct_ecache_state {
              NFCT_ECACHE_UNKNOWN,                /* destroy event not sent */
              NFCT_ECACHE_DESTROY_FAIL,        /* tried but failed to send destroy event */
              NFCT_ECACHE_DESTROY_SENT,        /* sent destroy event after failure */
      };
      
      struct nf_conntrack_ecache {
              unsigned long cache;                /* bitops want long */
              u16 missed;                        /* missed events */
              u16 ctmask;                        /* bitmask of ct events to be delivered */
              u16 expmask;                        /* bitmask of expect events to be delivered */
              enum nf_ct_ecache_state state:8;/* ecache state */
              u32 portid;                        /* netlink portid of destroyer */
      };
      
      static inline struct nf_conntrack_ecache *
      nf_ct_ecache_find(const struct nf_conn *ct)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
 1144         return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
      #else
              return NULL;
      #endif
      }
      
      static inline struct nf_conntrack_ecache *
      nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
              struct net *net = nf_ct_net(ct);
              struct nf_conntrack_ecache *e;
      
 1059         if (!ctmask && !expmask && net->ct.sysctl_events) {
                      ctmask = ~0;
                      expmask = ~0;
              }
              if (!ctmask && !expmask)
                      return NULL;
      
 1059         e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
              if (e) {
 1058                 e->ctmask  = ctmask;
                      e->expmask = expmask;
              }
              return e;
      #else
              return NULL;
      #endif
      }
      
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
      
      /* This structure is passed to event handler */
      struct nf_ct_event {
              struct nf_conn *ct;
              u32 portid;
              int report;
      };
      
      struct nf_ct_event_notifier {
              int (*fcn)(unsigned int events, struct nf_ct_event *item);
      };
      
      int nf_conntrack_register_notifier(struct net *net,
                                         struct nf_ct_event_notifier *nb);
      void nf_conntrack_unregister_notifier(struct net *net,
                                            struct nf_ct_event_notifier *nb);
      
      void nf_ct_deliver_cached_events(struct nf_conn *ct);
      int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
                                        u32 portid, int report);
      
      #else
      
      static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct)
      {
      }
      
      static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
                                                      struct nf_conn *ct,
                                                      u32 portid,
                                                      int report)
      {
              return 0;
      }
      
      #endif
      
      static inline void
  915 nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
  416         struct net *net = nf_ct_net(ct);
              struct nf_conntrack_ecache *e;
      
  865         if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
                      return;
      
  915         e = nf_ct_ecache_find(ct);
              if (e == NULL)
                      return;
      
  915         set_bit(event, &e->cache);
      #endif
      }
      
      static inline int
      nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
                                u32 portid, int report)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
   37         const struct net *net = nf_ct_net(ct);
      
              if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
                      return 0;
      
   37         return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
      #else
              return 0;
      #endif
      }
      
      static inline int
      nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
              const struct net *net = nf_ct_net(ct);
      
              if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
                      return 0;
      
              return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
      #else
              return 0;
      #endif
      }
      
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
      
      struct nf_exp_event {
              struct nf_conntrack_expect *exp;
              u32 portid;
              int report;
      };
      
      struct nf_exp_event_notifier {
              int (*fcn)(unsigned int events, struct nf_exp_event *item);
      };
      
      int nf_ct_expect_register_notifier(struct net *net,
                                         struct nf_exp_event_notifier *nb);
      void nf_ct_expect_unregister_notifier(struct net *net,
                                            struct nf_exp_event_notifier *nb);
      
      void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
                                     struct nf_conntrack_expect *exp,
                                     u32 portid, int report);
      
      void nf_conntrack_ecache_pernet_init(struct net *net);
      void nf_conntrack_ecache_pernet_fini(struct net *net);
      
      int nf_conntrack_ecache_init(void);
      void nf_conntrack_ecache_fini(void);
      
      #else /* CONFIG_NF_CONNTRACK_EVENTS */
      
      static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
                                                   struct nf_conntrack_expect *exp,
                                                   u32 portid,
                                                   int report)
      {
      }
      
      static inline void nf_conntrack_ecache_pernet_init(struct net *net)
      {
      }
      
      static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
      {
      }
      
      static inline int nf_conntrack_ecache_init(void)
      {
              return 0;
      }
      
      static inline void nf_conntrack_ecache_fini(void)
      {
      }
      
      #endif /* CONFIG_NF_CONNTRACK_EVENTS */
      
      static inline void nf_conntrack_ecache_delayed_work(struct net *net)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
              if (!delayed_work_pending(&net->ct.ecache_dwork)) {
                      schedule_delayed_work(&net->ct.ecache_dwork, HZ);
                      net->ct.ecache_dwork_pending = true;
              }
      #endif
      }
      
      static inline void nf_conntrack_ecache_work(struct net *net)
      {
      #ifdef CONFIG_NF_CONNTRACK_EVENTS
   37         if (net->ct.ecache_dwork_pending) {
                      net->ct.ecache_dwork_pending = false;
                      mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
              }
      #endif
      }
      
      #endif /*_NF_CONNTRACK_ECACHE_H*/
      // SPDX-License-Identifier: GPL-2.0-only
      /*
       * Kernel Connection Multiplexor
       *
       * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
       */
      
      #include <linux/bpf.h>
      #include <linux/errno.h>
      #include <linux/errqueue.h>
      #include <linux/file.h>
      #include <linux/in.h>
      #include <linux/kernel.h>
      #include <linux/module.h>
      #include <linux/net.h>
      #include <linux/netdevice.h>
      #include <linux/poll.h>
      #include <linux/rculist.h>
      #include <linux/skbuff.h>
      #include <linux/socket.h>
      #include <linux/uaccess.h>
      #include <linux/workqueue.h>
      #include <linux/syscalls.h>
      #include <linux/sched/signal.h>
      
      #include <net/kcm.h>
      #include <net/netns/generic.h>
      #include <net/sock.h>
      #include <uapi/linux/kcm.h>
      
      unsigned int kcm_net_id;
      
      static struct kmem_cache *kcm_psockp __read_mostly;
      static struct kmem_cache *kcm_muxp __read_mostly;
      static struct workqueue_struct *kcm_wq;
      
      static inline struct kcm_sock *kcm_sk(const struct sock *sk)
      {
              return (struct kcm_sock *)sk;
      }
      
      static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
      {
    1         return (struct kcm_tx_msg *)skb->cb;
      }
      
      static void report_csk_error(struct sock *csk, int err)
      {
              csk->sk_err = EPIPE;
    4         csk->sk_error_report(csk);
      }
      
      static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
                                     bool wakeup_kcm)
      {
    4         struct sock *csk = psock->sk;
              struct kcm_mux *mux = psock->mux;
      
              /* Unrecoverable error in transmit */
      
              spin_lock_bh(&mux->lock);
      
              if (psock->tx_stopped) {
                      spin_unlock_bh(&mux->lock);
                      return;
              }
      
    4         psock->tx_stopped = 1;
              KCM_STATS_INCR(psock->stats.tx_aborts);
      
              if (!psock->tx_kcm) {
                      /* Take off psocks_avail list */
                      list_del(&psock->psock_avail_list);
    4         } else if (wakeup_kcm) {
                      /* In this case psock is being aborted while outside of
                       * write_msgs and psock is reserved. Schedule tx_work
                       * to handle the failure there. Need to commit tx_stopped
                       * before queuing work.
                       */
                      smp_mb();
      
                      queue_work(kcm_wq, &psock->tx_kcm->tx_work);
              }
      
    4         spin_unlock_bh(&mux->lock);
      
              /* Report error on lower socket */
    4         report_csk_error(csk, err);
      }
      
      /* RX mux lock held. */
      static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
                                          struct kcm_psock *psock)
      {
              STRP_STATS_ADD(mux->stats.rx_bytes,
                             psock->strp.stats.bytes -
                             psock->saved_rx_bytes);
              mux->stats.rx_msgs +=
                      psock->strp.stats.msgs - psock->saved_rx_msgs;
              psock->saved_rx_msgs = psock->strp.stats.msgs;
              psock->saved_rx_bytes = psock->strp.stats.bytes;
      }
      
      static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
                                          struct kcm_psock *psock)
      {
              KCM_STATS_ADD(mux->stats.tx_bytes,
                            psock->stats.tx_bytes - psock->saved_tx_bytes);
              mux->stats.tx_msgs +=
                      psock->stats.tx_msgs - psock->saved_tx_msgs;
              psock->saved_tx_msgs = psock->stats.tx_msgs;
              psock->saved_tx_bytes = psock->stats.tx_bytes;
      }
      
      static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
      
      /* KCM is ready to receive messages on its queue-- either the KCM is new or
       * has become unblocked after being blocked on full socket buffer. Queue any
       * pending ready messages on a psock. RX mux lock held.
       */
      static void kcm_rcv_ready(struct kcm_sock *kcm)
      {
   84         struct kcm_mux *mux = kcm->mux;
              struct kcm_psock *psock;
              struct sk_buff *skb;
      
   84         if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
                      return;
      
   84         while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
                      if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
                              /* Assuming buffer limit has been reached */
                              skb_queue_head(&mux->rx_hold_queue, skb);
   84                         WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
                              return;
                      }
              }
      
   84         while (!list_empty(&mux->psocks_ready)) {
                      psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
                                               psock_ready_list);
      
                      if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
                              /* Assuming buffer limit has been reached */
                              WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
                              return;
                      }
      
                      /* Consumed the ready message on the psock. Schedule rx_work to
                       * get more messages.
                       */
                      list_del(&psock->psock_ready_list);
                      psock->ready_rx_msg = NULL;
                      /* Commit clearing of ready_rx_msg for queuing work */
                      smp_mb();
      
                      strp_unpause(&psock->strp);
                      strp_check_rcv(&psock->strp);
              }
      
              /* Buffer limit is okay now, add to ready list */
   84         list_add_tail(&kcm->wait_rx_list,
   84                       &kcm->mux->kcm_rx_waiters);
   84         kcm->rx_wait = true;
      }
      
      static void kcm_rfree(struct sk_buff *skb)
      {
              struct sock *sk = skb->sk;
              struct kcm_sock *kcm = kcm_sk(sk);
              struct kcm_mux *mux = kcm->mux;
              unsigned int len = skb->truesize;
      
              sk_mem_uncharge(sk, len);
              atomic_sub(len, &sk->sk_rmem_alloc);
      
              /* For reading rx_wait and rx_psock without holding lock */
              smp_mb__after_atomic();
      
              if (!kcm->rx_wait && !kcm->rx_psock &&
                  sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
                      spin_lock_bh(&mux->rx_lock);
                      kcm_rcv_ready(kcm);
                      spin_unlock_bh(&mux->rx_lock);
              }
      }
      
      static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
      {
              struct sk_buff_head *list = &sk->sk_receive_queue;
      
              if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
                      return -ENOMEM;
      
              if (!sk_rmem_schedule(sk, skb, skb->truesize))
                      return -ENOBUFS;
      
              skb->dev = NULL;
      
              skb_orphan(skb);
              skb->sk = sk;
              skb->destructor = kcm_rfree;
              atomic_add(skb->truesize, &sk->sk_rmem_alloc);
              sk_mem_charge(sk, skb->truesize);
      
              skb_queue_tail(list, skb);
      
              if (!sock_flag(sk, SOCK_DEAD))
                      sk->sk_data_ready(sk);
      
              return 0;
      }
      
      /* Requeue received messages for a kcm socket to other kcm sockets. This is
       * called with a kcm socket is receive disabled.
       * RX mux lock held.
       */
      static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
      {
              struct sk_buff *skb;
              struct kcm_sock *kcm;
      
   20         while ((skb = __skb_dequeue(head))) {
                      /* Reset destructor to avoid calling kcm_rcv_ready */
                      skb->destructor = sock_rfree;
                      skb_orphan(skb);
      try_again:
                      if (list_empty(&mux->kcm_rx_waiters)) {
                              skb_queue_tail(&mux->rx_hold_queue, skb);
                              continue;
                      }
      
                      kcm = list_first_entry(&mux->kcm_rx_waiters,
                                             struct kcm_sock, wait_rx_list);
      
                      if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
                              /* Should mean socket buffer full */
                              list_del(&kcm->wait_rx_list);
                              kcm->rx_wait = false;
      
                              /* Commit rx_wait to read in kcm_free */
                              smp_wmb();
      
                              goto try_again;
                      }
              }
   20 }
      
      /* Lower sock lock held */
      static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
                                             struct sk_buff *head)
      {
              struct kcm_mux *mux = psock->mux;
              struct kcm_sock *kcm;
      
              WARN_ON(psock->ready_rx_msg);
      
              if (psock->rx_kcm)
                      return psock->rx_kcm;
      
              spin_lock_bh(&mux->rx_lock);
      
              if (psock->rx_kcm) {
                      spin_unlock_bh(&mux->rx_lock);
                      return psock->rx_kcm;
              }
      
              kcm_update_rx_mux_stats(mux, psock);
      
              if (list_empty(&mux->kcm_rx_waiters)) {
                      psock->ready_rx_msg = head;
                      strp_pause(&psock->strp);
                      list_add_tail(&psock->psock_ready_list,
                                    &mux->psocks_ready);
                      spin_unlock_bh(&mux->rx_lock);
                      return NULL;
              }
      
              kcm = list_first_entry(&mux->kcm_rx_waiters,
                                     struct kcm_sock, wait_rx_list);
              list_del(&kcm->wait_rx_list);
              kcm->rx_wait = false;
      
              psock->rx_kcm = kcm;
              kcm->rx_psock = psock;
      
              spin_unlock_bh(&mux->rx_lock);
      
              return kcm;
      }
      
      static void kcm_done(struct kcm_sock *kcm);
      
      static void kcm_done_work(struct work_struct *w)
      {
              kcm_done(container_of(w, struct kcm_sock, done_work));
      }
      
      /* Lower sock held */
      static void unreserve_rx_kcm(struct kcm_psock *psock,
                                   bool rcv_ready)
      {
              struct kcm_sock *kcm = psock->rx_kcm;
              struct kcm_mux *mux = psock->mux;
      
              if (!kcm)
                      return;
      
              spin_lock_bh(&mux->rx_lock);
      
              psock->rx_kcm = NULL;
              kcm->rx_psock = NULL;
      
              /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
               * kcm_rfree
               */
              smp_mb();
      
              if (unlikely(kcm->done)) {
                      spin_unlock_bh(&mux->rx_lock);
      
                      /* Need to run kcm_done in a task since we need to qcquire
                       * callback locks which may already be held here.
                       */
                      INIT_WORK(&kcm->done_work, kcm_done_work);
                      schedule_work(&kcm->done_work);
                      return;
              }
      
              if (unlikely(kcm->rx_disabled)) {
                      requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
              } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
                      /* Check for degenerative race with rx_wait that all
                       * data was dequeued (accounted for in kcm_rfree).
                       */
                      kcm_rcv_ready(kcm);
              }
              spin_unlock_bh(&mux->rx_lock);
      }
      
      /* Lower sock lock held */
      static void psock_data_ready(struct sock *sk)
      {
              struct kcm_psock *psock;
      
              read_lock_bh(&sk->sk_callback_lock);
      
              psock = (struct kcm_psock *)sk->sk_user_data;
              if (likely(psock))
                      strp_data_ready(&psock->strp);
      
              read_unlock_bh(&sk->sk_callback_lock);
      }
      
      /* Called with lower sock held */
      static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
      {
              struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
              struct kcm_sock *kcm;
      
      try_queue:
              kcm = reserve_rx_kcm(psock, skb);
              if (!kcm) {
                       /* Unable to reserve a KCM, message is held in psock and strp
                        * is paused.
                        */
                      return;
              }
      
              if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
                      /* Should mean socket buffer full */
                      unreserve_rx_kcm(psock, false);
                      goto try_queue;
              }
      }
      
      static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
      {
              struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
              struct bpf_prog *prog = psock->bpf_prog;
              int res;
      
              preempt_disable();
              res = BPF_PROG_RUN(prog, skb);
              preempt_enable();
              return res;
      }
      
      static int kcm_read_sock_done(struct strparser *strp, int err)
      {
              struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
      
              unreserve_rx_kcm(psock, true);
      
              return err;
      }
      
      static void psock_state_change(struct sock *sk)
      {
              /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
               * since application will normally not poll with EPOLLIN
               * on the TCP sockets.
               */
      
              report_csk_error(sk, EPIPE);
      }
      
      static void psock_write_space(struct sock *sk)
      {
              struct kcm_psock *psock;
              struct kcm_mux *mux;
              struct kcm_sock *kcm;
      
              read_lock_bh(&sk->sk_callback_lock);
      
              psock = (struct kcm_psock *)sk->sk_user_data;
              if (unlikely(!psock))
                      goto out;
              mux = psock->mux;
      
              spin_lock_bh(&mux->lock);
      
              /* Check if the socket is reserved so someone is waiting for sending. */
              kcm = psock->tx_kcm;
              if (kcm && !unlikely(kcm->tx_stopped))
                      queue_work(kcm_wq, &kcm->tx_work);
      
              spin_unlock_bh(&mux->lock);
      out:
              read_unlock_bh(&sk->sk_callback_lock);
      }
      
      static void unreserve_psock(struct kcm_sock *kcm);
      
      /* kcm sock is locked. */
      static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
      {
   18         struct kcm_mux *mux = kcm->mux;
              struct kcm_psock *psock;
      
              psock = kcm->tx_psock;
      
              smp_rmb(); /* Must read tx_psock before tx_wait */
      
              if (psock) {
                      WARN_ON(kcm->tx_wait);
                      if (unlikely(psock->tx_stopped))
                              unreserve_psock(kcm);
                      else
                              return kcm->tx_psock;
              }
      
   18         spin_lock_bh(&mux->lock);
      
              /* Check again under lock to see if psock was reserved for this
               * psock via psock_unreserve.
               */
              psock = kcm->tx_psock;
              if (unlikely(psock)) {
                      WARN_ON(kcm->tx_wait);
                      spin_unlock_bh(&mux->lock);
                      return kcm->tx_psock;
              }
      
   18         if (!list_empty(&mux->psocks_avail)) {
    1                 psock = list_first_entry(&mux->psocks_avail,
                                               struct kcm_psock,
                                               psock_avail_list);
    1                 list_del(&psock->psock_avail_list);
                      if (kcm->tx_wait) {
                              list_del(&kcm->wait_psock_list);
                              kcm->tx_wait = false;
                      }
    1                 kcm->tx_psock = psock;
                      psock->tx_kcm = kcm;
                      KCM_STATS_INCR(psock->stats.reserved);
   17         } else if (!kcm->tx_wait) {
   16                 list_add_tail(&kcm->wait_psock_list,
                                    &mux->kcm_tx_waiters);
   16                 kcm->tx_wait = true;
              }
      
    3         spin_unlock_bh(&mux->lock);
      
              return psock;
      }
      
      /* mux lock held */
      static void psock_now_avail(struct kcm_psock *psock)
      {
    5         struct kcm_mux *mux = psock->mux;
              struct kcm_sock *kcm;
      
              if (list_empty(&mux->kcm_tx_waiters)) {
    5                 list_add_tail(&psock->psock_avail_list,
                                    &mux->psocks_avail);
              } else {
                      kcm = list_first_entry(&mux->kcm_tx_waiters,
                                             struct kcm_sock,
                                             wait_psock_list);
                      list_del(&kcm->wait_psock_list);
                      kcm->tx_wait = false;
                      psock->tx_kcm = kcm;
      
                      /* Commit before changing tx_psock since that is read in
                       * reserve_psock before queuing work.
                       */
                      smp_mb();
      
                      kcm->tx_psock = psock;
                      KCM_STATS_INCR(psock->stats.reserved);
                      queue_work(kcm_wq, &kcm->tx_work);
              }
    5 }
      
      /* kcm sock is locked. */
      static void unreserve_psock(struct kcm_sock *kcm)
      {
              struct kcm_psock *psock;
    2         struct kcm_mux *mux = kcm->mux;
      
              spin_lock_bh(&mux->lock);
      
              psock = kcm->tx_psock;
      
              if (WARN_ON(!psock)) {
                      spin_unlock_bh(&mux->lock);
                      return;
              }
      
    2         smp_rmb(); /* Read tx_psock before tx_wait */
      
              kcm_update_tx_mux_stats(mux, psock);
      
              WARN_ON(kcm->tx_wait);
      
    2         kcm->tx_psock = NULL;
              psock->tx_kcm = NULL;
              KCM_STATS_INCR(psock->stats.unreserved);
      
              if (unlikely(psock->tx_stopped)) {
    2                 if (psock->done) {
                              /* Deferred free */
                              list_del(&psock->psock_list);
                              mux->psocks_cnt--;
                              sock_put(psock->sk);
                              fput(psock->sk->sk_socket->file);
                              kmem_cache_free(kcm_psockp, psock);
                      }
      
                      /* Don't put back on available list */
      
    2                 spin_unlock_bh(&mux->lock);
      
                      return;
              }
      
              psock_now_avail(psock);
      
    2         spin_unlock_bh(&mux->lock);
      }
      
      static void kcm_report_tx_retry(struct kcm_sock *kcm)
      {
              struct kcm_mux *mux = kcm->mux;
      
              spin_lock_bh(&mux->lock);
              KCM_STATS_INCR(mux->stats.tx_retries);
              spin_unlock_bh(&mux->lock);
      }
      
      /* Write any messages ready on the kcm socket.  Called with kcm sock lock
       * held.  Return bytes actually sent or error.
       */
      static int kcm_write_msgs(struct kcm_sock *kcm)
      {
   21         struct sock *sk = &kcm->sk;
              struct kcm_psock *psock;
              struct sk_buff *skb, *head;
              struct kcm_tx_msg *txm;
              unsigned short fragidx, frag_offset;
              unsigned int sent, total_sent = 0;
              int ret = 0;
      
              kcm->tx_wait_more = false;
              psock = kcm->tx_psock;
              if (unlikely(psock && psock->tx_stopped)) {
                      /* A reserved psock was aborted asynchronously. Unreserve
                       * it and we'll retry the message.
                       */
                      unreserve_psock(kcm);
                      kcm_report_tx_retry(kcm);
                      if (skb_queue_empty(&sk->sk_write_queue))
                              return 0;
      
                      kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
      
   21         } else if (skb_queue_empty(&sk->sk_write_queue)) {
                      return 0;
              }
      
              head = skb_peek(&sk->sk_write_queue);
              txm = kcm_tx_msg(head);
      
   18         if (txm->sent) {
                      /* Send of first skbuff in queue already in progress */
                      if (WARN_ON(!psock)) {
                              ret = -EINVAL;
                              goto out;
                      }
                      sent = txm->sent;
                      frag_offset = txm->frag_offset;
                      fragidx = txm->fragidx;
                      skb = txm->frag_skb;
      
                      goto do_frag;
              }
      
      try_again:
   18         psock = reserve_psock(kcm);
    1         if (!psock)
                      goto out;
      
              do {
                      skb = head;
    1                 txm = kcm_tx_msg(head);
                      sent = 0;
      
      do_frag_list:
    1                 if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
                              ret = -EINVAL;
                              goto out;
                      }
      
    1                 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
                           fragidx++) {
                              skb_frag_t *frag;
      
                              frag_offset = 0;
      do_frag:
    1                         frag = &skb_shinfo(skb)->frags[fragidx];
                              if (WARN_ON(!skb_frag_size(frag))) {
                                      ret = -EINVAL;
                                      goto out;
                              }
      
                              ret = kernel_sendpage(psock->sk->sk_socket,
                                                    skb_frag_page(frag),
                                                    skb_frag_off(frag) + frag_offset,
    1                                               skb_frag_size(frag) - frag_offset,
                                                    MSG_DONTWAIT);
                              if (ret <= 0) {
    1                                 if (ret == -EAGAIN) {
                                              /* Save state to try again when there's
                                               * write space on the socket
                                               */
    1                                         txm->sent = sent;
                                              txm->frag_offset = frag_offset;
                                              txm->fragidx = fragidx;
                                              txm->frag_skb = skb;
      
                                              ret = 0;
                                              goto out;
                                      }
      
                                      /* Hard failure in sending message, abort this
                                       * psock since it has lost framing
                                       * synchonization and retry sending the
                                       * message from the beginning.
                                       */
                                      kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
                                                         true);
                                      unreserve_psock(kcm);
      
                                      txm->sent = 0;
                                      kcm_report_tx_retry(kcm);
                                      ret = 0;
      
                                      goto try_again;
                              }
      
                              sent += ret;
                              frag_offset += ret;
                              KCM_STATS_ADD(psock->stats.tx_bytes, ret);
                              if (frag_offset < skb_frag_size(frag)) {
                                      /* Not finished with this frag */
                                      goto do_frag;
                              }
                      }
      
                      if (skb == head) {
                              if (skb_has_frag_list(skb)) {
                                      skb = skb_shinfo(skb)->frag_list;
                                      goto do_frag_list;
                              }
                      } else if (skb->next) {
                              skb = skb->next;
                              goto do_frag_list;
                      }
      
                      /* Successfully sent the whole packet, account for it. */
                      skb_dequeue(&sk->sk_write_queue);
                      kfree_skb(head);
                      sk->sk_wmem_queued -= sent;
                      total_sent += sent;
                      KCM_STATS_INCR(psock->stats.tx_msgs);
              } while ((head = skb_peek(&sk->sk_write_queue)));
      out:
   18         if (!head) {
                      /* Done with all queued messages. */
                      WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
                      unreserve_psock(kcm);
              }
      
              /* Check if write space is available */
   18         sk->sk_write_space(sk);
      
   20         return total_sent ? : ret;
      }
      
      static void kcm_tx_work(struct work_struct *w)
      {
              struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
              struct sock *sk = &kcm->sk;
              int err;
      
              lock_sock(sk);
      
              /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
               * aborts
               */
              err = kcm_write_msgs(kcm);
              if (err < 0) {
                      /* Hard failure in write, report error on KCM socket */
                      pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
                      report_csk_error(&kcm->sk, -err);
                      goto out;
              }
      
              /* Primarily for SOCK_SEQPACKET sockets */
              if (likely(sk->sk_socket) &&
                  test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                      clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                      sk->sk_write_space(sk);
              }
      
      out:
              release_sock(sk);
      }
      
      static void kcm_push(struct kcm_sock *kcm)
      {
    6         if (kcm->tx_wait_more)
    1                 kcm_write_msgs(kcm);
      }
      
      static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
                                  int offset, size_t size, int flags)
      
      {
              struct sock *sk = sock->sk;
              struct kcm_sock *kcm = kcm_sk(sk);
              struct sk_buff *skb = NULL, *head = NULL;
              long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
              bool eor;
              int err = 0;
              int i;
      
              if (flags & MSG_SENDPAGE_NOTLAST)
                      flags |= MSG_MORE;
      
              /* No MSG_EOR from splice, only look at MSG_MORE */
              eor = !(flags & MSG_MORE);
      
              lock_sock(sk);
      
              sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
      
              err = -EPIPE;
              if (sk->sk_err)
                      goto out_error;
      
              if (kcm->seq_skb) {
                      /* Previously opened message */
                      head = kcm->seq_skb;
                      skb = kcm_tx_msg(head)->last_skb;
                      i = skb_shinfo(skb)->nr_frags;
      
                      if (skb_can_coalesce(skb, i, page, offset)) {
                              skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
                              skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
                              goto coalesced;
                      }
      
                      if (i >= MAX_SKB_FRAGS) {
                              struct sk_buff *tskb;
      
                              tskb = alloc_skb(0, sk->sk_allocation);
                              while (!tskb) {
                                      kcm_push(kcm);
                                      err = sk_stream_wait_memory(sk, &timeo);
                                      if (err)
                                              goto out_error;
                              }
      
                              if (head == skb)
                                      skb_shinfo(head)->frag_list = tskb;
                              else
                                      skb->next = tskb;
      
                              skb = tskb;
                              skb->ip_summed = CHECKSUM_UNNECESSARY;
                              i = 0;
                      }
              } else {
                      /* Call the sk_stream functions to manage the sndbuf mem. */
                      if (!sk_stream_memory_free(sk)) {
                              kcm_push(kcm);
                              set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                              err = sk_stream_wait_memory(sk, &timeo);
                              if (err)
                                      goto out_error;
                      }
      
                      head = alloc_skb(0, sk->sk_allocation);
                      while (!head) {
                              kcm_push(kcm);
                              err = sk_stream_wait_memory(sk, &timeo);
                              if (err)
                                      goto out_error;
                      }
      
                      skb = head;
                      i = 0;
              }
      
              get_page(page);
              skb_fill_page_desc(skb, i, page, offset, size);
              skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
      
      coalesced:
              skb->len += size;
              skb->data_len += size;
              skb->truesize += size;
              sk->sk_wmem_queued += size;
              sk_mem_charge(sk, size);
      
              if (head != skb) {
                      head->len += size;
                      head->data_len += size;
                      head->truesize += size;
              }
      
              if (eor) {
                      bool not_busy = skb_queue_empty(&sk->sk_write_queue);
      
                      /* Message complete, queue it on send buffer */
                      __skb_queue_tail(&sk->sk_write_queue, head);
                      kcm->seq_skb = NULL;
                      KCM_STATS_INCR(kcm->stats.tx_msgs);
      
                      if (flags & MSG_BATCH) {
                              kcm->tx_wait_more = true;
                      } else if (kcm->tx_wait_more || not_busy) {
                              err = kcm_write_msgs(kcm);
                              if (err < 0) {
                                      /* We got a hard error in write_msgs but have
                                       * already queued this message. Report an error
                                       * in the socket, but don't affect return value
                                       * from sendmsg
                                       */
                                      pr_warn("KCM: Hard failure on kcm_write_msgs\n");
                                      report_csk_error(&kcm->sk, -err);
                              }
                      }
              } else {
                      /* Message not complete, save state */
                      kcm->seq_skb = head;
                      kcm_tx_msg(head)->last_skb = skb;
              }
      
              KCM_STATS_ADD(kcm->stats.tx_bytes, size);
      
              release_sock(sk);
              return size;
      
      out_error:
              kcm_push(kcm);
      
              err = sk_stream_error(sk, flags, err);
      
              /* make sure we wake any epoll edge trigger waiter */
              if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
                      sk->sk_write_space(sk);
      
              release_sock(sk);
              return err;
      }
      
      static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
      {
   53         struct sock *sk = sock->sk;
              struct kcm_sock *kcm = kcm_sk(sk);
              struct sk_buff *skb = NULL, *head = NULL;
              size_t copy, copied = 0;
   53         long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
              int eor = (sock->type == SOCK_DGRAM) ?
   53                   !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
              int err = -EPIPE;
      
   53         lock_sock(sk);
      
              /* Per tcp_sendmsg this should be in poll */
    2         sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
      
   53         if (sk->sk_err)
                      goto out_error;
      
   53         if (kcm->seq_skb) {
                      /* Previously opened message */
                      head = kcm->seq_skb;
    6                 skb = kcm_tx_msg(head)->last_skb;
                      goto start;
              }
      
              /* Call the sk_stream functions to manage the sndbuf mem. */
   50         if (!sk_stream_memory_free(sk)) {
    2                 kcm_push(kcm);
    2                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                      err = sk_stream_wait_memory(sk, &timeo);
                      if (err)
                              goto out_error;
              }
      
   50         if (msg_data_left(msg)) {
                      /* New message, alloc head skb */
   44                 head = alloc_skb(0, sk->sk_allocation);
                      while (!head) {
                              kcm_push(kcm);
                              err = sk_stream_wait_memory(sk, &timeo);
                              if (err)
                                      goto out_error;
      
                              head = alloc_skb(0, sk->sk_allocation);
                      }
      
                      skb = head;
      
                      /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
                       * csum_and_copy_from_iter from skb_do_copy_data_nocache.
                       */
   41                 skb->ip_summed = CHECKSUM_UNNECESSARY;
              }
      
      start:
   45         while (msg_data_left(msg)) {
                      bool merge = true;
   44                 int i = skb_shinfo(skb)->nr_frags;
   44                 struct page_frag *pfrag = sk_page_frag(sk);
      
   44                 if (!sk_page_frag_refill(sk, pfrag))
                              goto wait_for_memory;
      
   44                 if (!skb_can_coalesce(skb, i, pfrag->page,
                                            pfrag->offset)) {
   11                         if (i == MAX_SKB_FRAGS) {
                                      struct sk_buff *tskb;
      
   10                                 tskb = alloc_skb(0, sk->sk_allocation);
                                      if (!tskb)
                                              goto wait_for_memory;
      
   10                                 if (head == skb)
   10                                         skb_shinfo(head)->frag_list = tskb;
                                      else
   10                                         skb->next = tskb;
      
                                      skb = tskb;
   10                                 skb->ip_summed = CHECKSUM_UNNECESSARY;
                                      continue;
                              }
                              merge = false;
                      }
      
   44                 copy = min_t(int, msg_data_left(msg),
                                   pfrag->size - pfrag->offset);
      
                      if (!sk_wmem_schedule(sk, copy))
                              goto wait_for_memory;
      
   44                 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
                                                     pfrag->page,
                                                     pfrag->offset,
                                                     copy);
                      if (err)
                              goto out_error;
      
                      /* Update the skb. */
   43                 if (merge) {
    2                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                      } else {
   42                         skb_fill_page_desc(skb, i, pfrag->page,
   42                                            pfrag->offset, copy);
   42                         get_page(pfrag->page);
                      }
      
   43                 pfrag->offset += copy;
                      copied += copy;
                      if (head != skb) {
   10                         head->len += copy;
                              head->data_len += copy;
                      }
      
                      continue;
      
      wait_for_memory:
                      kcm_push(kcm);
                      err = sk_stream_wait_memory(sk, &timeo);
                      if (err)
                              goto out_error;
              }
      
   40         if (eor) {
   22                 bool not_busy = skb_queue_empty(&sk->sk_write_queue);
      
                      if (head) {
                              /* Message complete, queue it on send buffer */
   18                         __skb_queue_tail(&sk->sk_write_queue, head);
                              kcm->seq_skb = NULL;
                              KCM_STATS_INCR(kcm->stats.tx_msgs);
                      }
      
   22                 if (msg->msg_flags & MSG_BATCH) {
    4                         kcm->tx_wait_more = true;
   21                 } else if (kcm->tx_wait_more || not_busy) {
   20                         err = kcm_write_msgs(kcm);
                              if (err < 0) {
                                      /* We got a hard error in write_msgs but have
                                       * already queued this message. Report an error
                                       * in the socket, but don't affect return value
                                       * from sendmsg
                                       */
                                      pr_warn("KCM: Hard failure on kcm_write_msgs\n");
                                      report_csk_error(&kcm->sk, -err);
                              }
                      }
              } else {
                      /* Message not complete, save state */
      partial_message:
   21                 if (head) {
   19                         kcm->seq_skb = head;
                              kcm_tx_msg(head)->last_skb = skb;
                      }
              }
      
   41         KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
      
              release_sock(sk);
              return copied;
      
      out_error:
    6         kcm_push(kcm);
      
    5         if (copied && sock->type == SOCK_SEQPACKET) {
                      /* Wrote some bytes before encountering an
                       * error, return partial success.
                       */
                      goto partial_message;
              }
      
    4         if (head != kcm->seq_skb)
    3                 kfree_skb(head);
      
    4         err = sk_stream_error(sk, msg->msg_flags, err);
      
              /* make sure we wake any epoll edge trigger waiter */
    3         if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
    2                 sk->sk_write_space(sk);
      
    3         release_sock(sk);
   44         return err;
      }
      
      static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
                                           long timeo, int *err)
      {
              struct sk_buff *skb;
      
   17         while (!(skb = skb_peek(&sk->sk_receive_queue))) {
   17                 if (sk->sk_err) {
                              *err = sock_error(sk);
                              return NULL;
                      }
      
   17                 if (sock_flag(sk, SOCK_DONE))
                              return NULL;
      
   17                 if ((flags & MSG_DONTWAIT) || !timeo) {
   10                         *err = -EAGAIN;
                              return NULL;
                      }
      
    7                 sk_wait_data(sk, &timeo, NULL);
      
                      /* Handle signals */
                      if (signal_pending(current)) {
                              *err = sock_intr_errno(timeo);
   10                         return NULL;
                      }
              }
      
              return skb;
      }
      
      static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
                             size_t len, int flags)
      {
   17         struct sock *sk = sock->sk;
              struct kcm_sock *kcm = kcm_sk(sk);
              int err = 0;
              long timeo;
              struct strp_msg *stm;
              int copied = 0;
              struct sk_buff *skb;
      
    7         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
      
   17         lock_sock(sk);
      
              skb = kcm_wait_data(sk, flags, timeo, &err);
              if (!skb)
                      goto out;
      
              /* Okay, have a message on the receive queue */
      
              stm = strp_msg(skb);
      
              if (len > stm->full_len)
                      len = stm->full_len;
      
              err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
              if (err < 0)
                      goto out;
      
              copied = len;
              if (likely(!(flags & MSG_PEEK))) {
                      KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
                      if (copied < stm->full_len) {
                              if (sock->type == SOCK_DGRAM) {
                                      /* Truncated message */
                                      msg->msg_flags |= MSG_TRUNC;
                                      goto msg_finished;
                              }
                              stm->offset += copied;
                              stm->full_len -= copied;
                      } else {
      msg_finished:
                              /* Finished with message */
                              msg->msg_flags |= MSG_EOR;
                              KCM_STATS_INCR(kcm->stats.rx_msgs);
                              skb_unlink(skb, &sk->sk_receive_queue);
                              kfree_skb(skb);
                      }
              }
      
      out:
   10         release_sock(sk);
      
   10         return copied ? : err;
      }
      
      static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
                                     struct pipe_inode_info *pipe, size_t len,
                                     unsigned int flags)
      {
              struct sock *sk = sock->sk;
              struct kcm_sock *kcm = kcm_sk(sk);
              long timeo;
              struct strp_msg *stm;
              int err = 0;
              ssize_t copied;
              struct sk_buff *skb;
      
              /* Only support splice for SOCKSEQPACKET */
      
              timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
      
              lock_sock(sk);
      
              skb = kcm_wait_data(sk, flags, timeo, &err);
              if (!skb)
                      goto err_out;
      
              /* Okay, have a message on the receive queue */
      
              stm = strp_msg(skb);
      
              if (len > stm->full_len)
                      len = stm->full_len;
      
              copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
              if (copied < 0) {
                      err = copied;
                      goto err_out;
              }
      
              KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
      
              stm->offset += copied;
              stm->full_len -= copied;
      
              /* We have no way to return MSG_EOR. If all the bytes have been
               * read we still leave the message in the receive socket buffer.
               * A subsequent recvmsg needs to be done to return MSG_EOR and
               * finish reading the message.
               */
      
              release_sock(sk);
      
              return copied;
      
      err_out:
              release_sock(sk);
      
              return err;
      }
      
      /* kcm sock lock held */
      static void kcm_recv_disable(struct kcm_sock *kcm)
      {
              struct kcm_mux *mux = kcm->mux;
      
              if (kcm->rx_disabled)
                      return;
      
              spin_lock_bh(&mux->rx_lock);
      
              kcm->rx_disabled = 1;
      
              /* If a psock is reserved we'll do cleanup in unreserve */
              if (!kcm->rx_psock) {
                      if (kcm->rx_wait) {
                              list_del(&kcm->wait_rx_list);
                              kcm->rx_wait = false;
                      }
      
                      requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
              }
      
              spin_unlock_bh(&mux->rx_lock);
      }
      
      /* kcm sock lock held */
      static void kcm_recv_enable(struct kcm_sock *kcm)
      {
              struct kcm_mux *mux = kcm->mux;
      
              if (!kcm->rx_disabled)
                      return;
      
              spin_lock_bh(&mux->rx_lock);
      
              kcm->rx_disabled = 0;
              kcm_rcv_ready(kcm);
      
              spin_unlock_bh(&mux->rx_lock);
      }
      
      static int kcm_setsockopt(struct socket *sock, int level, int optname,
                                char __user *optval, unsigned int optlen)
      {
    5         struct kcm_sock *kcm = kcm_sk(sock->sk);
              int val, valbool;
              int err = 0;
      
              if (level != SOL_KCM)
                      return -ENOPROTOOPT;
      
    4         if (optlen < sizeof(int))
                      return -EINVAL;
      
    3         if (get_user(val, (int __user *)optval))
                      return -EINVAL;
      
              valbool = val ? 1 : 0;
      
              switch (optname) {
              case KCM_RECV_DISABLE:
                      lock_sock(&kcm->sk);
                      if (valbool)
                              kcm_recv_disable(kcm);
                      else
                              kcm_recv_enable(kcm);
                      release_sock(&kcm->sk);
    4                 break;
              default:
                      err = -ENOPROTOOPT;
              }
      
              return err;
      }
      
      static int kcm_getsockopt(struct socket *sock, int level, int optname,
                                char __user *optval, int __user *optlen)
      {
              struct kcm_sock *kcm = kcm_sk(sock->sk);
              int val, len;
      
              if (level != SOL_KCM)
                      return -ENOPROTOOPT;
      
              if (get_user(len, optlen))
                      return -EFAULT;
      
              len = min_t(unsigned int, len, sizeof(int));
              if (len < 0)
                      return -EINVAL;
      
              switch (optname) {
              case KCM_RECV_DISABLE:
                      val = kcm->rx_disabled;
                      break;
              default:
                      return -ENOPROTOOPT;
              }
      
              if (put_user(len, optlen))
                      return -EFAULT;
              if (copy_to_user(optval, &val, len))
                      return -EFAULT;
              return 0;
      }
      
      static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
      {
              struct kcm_sock *tkcm;
              struct list_head *head;
              int index = 0;
      
              /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
               * we set sk_state, otherwise epoll_wait always returns right away with
               * EPOLLHUP
               */
   84         kcm->sk.sk_state = TCP_ESTABLISHED;
      
              /* Add to mux's kcm sockets list */
              kcm->mux = mux;
              spin_lock_bh(&mux->lock);
      
              head = &mux->kcm_socks;
              list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
   15                 if (tkcm->index != index)
                              break;
                      head = &tkcm->kcm_sock_list;
   14                 index++;
              }
      
   84         list_add(&kcm->kcm_sock_list, head);
   84         kcm->index = index;
      
              mux->kcm_socks_cnt++;
              spin_unlock_bh(&mux->lock);
      
              INIT_WORK(&kcm->tx_work, kcm_tx_work);
      
              spin_lock_bh(&mux->rx_lock);
              kcm_rcv_ready(kcm);
              spin_unlock_bh(&mux->rx_lock);
   14 }
      
      static int kcm_attach(struct socket *sock, struct socket *csock,
                            struct bpf_prog *prog)
      {
              struct kcm_sock *kcm = kcm_sk(sock->sk);
              struct kcm_mux *mux = kcm->mux;
              struct sock *csk;
              struct kcm_psock *psock = NULL, *tpsock;
              struct list_head *head;
              int index = 0;
              static const struct strp_callbacks cb = {
                      .rcv_msg = kcm_rcv_strparser,
                      .parse_msg = kcm_parse_func_strparser,
                      .read_sock_done = kcm_read_sock_done,
              };
              int err = 0;
      
              csk = csock->sk;
              if (!csk)
                      return -EINVAL;
      
   10         lock_sock(csk);
      
              /* Only allow TCP sockets to be attached for now */
              if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
    9             csk->sk_protocol != IPPROTO_TCP) {
                      err = -EOPNOTSUPP;
                      goto out;
              }
      
              /* Don't allow listeners or closed sockets */
    8         if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
                      err = -EOPNOTSUPP;
                      goto out;
              }
      
    5         psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
              if (!psock) {
                      err = -ENOMEM;
                      goto out;
              }
      
    5         psock->mux = mux;
              psock->sk = csk;
              psock->bpf_prog = prog;
      
              err = strp_init(&psock->strp, csk, &cb);
              if (err) {
                      kmem_cache_free(kcm_psockp, psock);
                      goto out;
              }
      
    5         write_lock_bh(&csk->sk_callback_lock);
      
              /* Check if sk_user_data is aready by KCM or someone else.
               * Must be done under lock to prevent race conditions.
               */
              if (csk->sk_user_data) {
                      write_unlock_bh(&csk->sk_callback_lock);
                      strp_stop(&psock->strp);
                      strp_done(&psock->strp);
                      kmem_cache_free(kcm_psockp, psock);
                      err = -EALREADY;
                      goto out;
              }
      
    5         psock->save_data_ready = csk->sk_data_ready;
              psock->save_write_space = csk->sk_write_space;
              psock->save_state_change = csk->sk_state_change;
              csk->sk_user_data = psock;
              csk->sk_data_ready = psock_data_ready;
              csk->sk_write_space = psock_write_space;
              csk->sk_state_change = psock_state_change;
      
              write_unlock_bh(&csk->sk_callback_lock);
      
    5         sock_hold(csk);
      
              /* Finished initialization, now add the psock to the MUX. */
    5         spin_lock_bh(&mux->lock);
              head = &mux->psocks;
              list_for_each_entry(tpsock, &mux->psocks, psock_list) {
    1                 if (tpsock->index != index)
                              break;
                      head = &tpsock->psock_list;
    1                 index++;
              }
      
    5         list_add(&psock->psock_list, head);
    5         psock->index = index;
      
              KCM_STATS_INCR(mux->stats.psock_attach);
              mux->psocks_cnt++;
              psock_now_avail(psock);
              spin_unlock_bh(&mux->lock);
      
              /* Schedule RX work in case there are already bytes queued */
              strp_check_rcv(&psock->strp);
      
      out:
    6         release_sock(csk);
      
              return err;
      }
      
      static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
      {
              struct socket *csock;
              struct bpf_prog *prog;
              int err;
      
              csock = sockfd_lookup(info->fd, &err);
              if (!csock)
                      return -ENOENT;
      
   12         prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
              if (IS_ERR(prog)) {
    2                 err = PTR_ERR(prog);
                      goto out;
              }
      
   10         err = kcm_attach(sock, csock, prog);
              if (err) {
    6                 bpf_prog_put(prog);
                      goto out;
              }
      
              /* Keep reference on file also */
      
              return 0;
      out:
    8         fput(csock->file);
              return err;
      }
      
      static void kcm_unattach(struct kcm_psock *psock)
      {
    5         struct sock *csk = psock->sk;
              struct kcm_mux *mux = psock->mux;
      
              lock_sock(csk);
      
              /* Stop getting callbacks from TCP socket. After this there should
               * be no way to reserve a kcm for this psock.
               */
              write_lock_bh(&csk->sk_callback_lock);
              csk->sk_user_data = NULL;
              csk->sk_data_ready = psock->save_data_ready;
              csk->sk_write_space = psock->save_write_space;
              csk->sk_state_change = psock->save_state_change;
              strp_stop(&psock->strp);
      
              if (WARN_ON(psock->rx_kcm)) {
                      write_unlock_bh(&csk->sk_callback_lock);
                      release_sock(csk);
                      return;
              }
      
    5         spin_lock_bh(&mux->rx_lock);
      
              /* Stop receiver activities. After this point psock should not be
               * able to get onto ready list either through callbacks or work.
               */
              if (psock->ready_rx_msg) {
                      list_del(&psock->psock_ready_list);
                      kfree_skb(psock->ready_rx_msg);
                      psock->ready_rx_msg = NULL;
                      KCM_STATS_INCR(mux->stats.rx_ready_drops);
              }
      
    5         spin_unlock_bh(&mux->rx_lock);
      
              write_unlock_bh(&csk->sk_callback_lock);
      
              /* Call strp_done without sock lock */
              release_sock(csk);
              strp_done(&psock->strp);
              lock_sock(csk);
      
              bpf_prog_put(psock->bpf_prog);
      
              spin_lock_bh(&mux->lock);
      
              aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
    4         save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
      
    4         KCM_STATS_INCR(mux->stats.psock_unattach);
      
              if (psock->tx_kcm) {
                      /* psock was reserved.  Just mark it finished and we will clean
                       * up in the kcm paths, we need kcm lock which can not be
                       * acquired here.
                       */
    2                 KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
                      spin_unlock_bh(&mux->lock);
      
                      /* We are unattaching a socket that is reserved. Abort the
                       * socket since we may be out of sync in sending on it. We need
                       * to do this without the mux lock.
                       */
                      kcm_abort_tx_psock(psock, EPIPE, false);
      
                      spin_lock_bh(&mux->lock);
                      if (!psock->tx_kcm) {
                              /* psock now unreserved in window mux was unlocked */
                              goto no_reserved;
                      }
    2                 psock->done = 1;
      
                      /* Commit done before queuing work to process it */
                      smp_mb();
      
                      /* Queue tx work to make sure psock->done is handled */
                      queue_work(kcm_wq, &psock->tx_kcm->tx_work);
                      spin_unlock_bh(&mux->lock);
              } else {
      no_reserved:
    2                 if (!psock->tx_stopped)
    1                         list_del(&psock->psock_avail_list);
    2                 list_del(&psock->psock_list);
                      mux->psocks_cnt--;
                      spin_unlock_bh(&mux->lock);
      
    2                 sock_put(csk);
    2                 fput(csk->sk_socket->file);
                      kmem_cache_free(kcm_psockp, psock);
              }
      
    4         release_sock(csk);
      }
      
      static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
      {
              struct kcm_sock *kcm = kcm_sk(sock->sk);
              struct kcm_mux *mux = kcm->mux;
              struct kcm_psock *psock;
              struct socket *csock;
              struct sock *csk;
              int err;
      
              csock = sockfd_lookup(info->fd, &err);
              if (!csock)
                      return -ENOENT;
      
   12         csk = csock->sk;
              if (!csk) {
                      err = -EINVAL;
                      goto out;
              }
      
   12         err = -ENOENT;
      
              spin_lock_bh(&mux->lock);
      
    1         list_for_each_entry(psock, &mux->psocks, psock_list) {
    4                 if (psock->sk != csk)
                              continue;
      
                      /* Found the matching psock */
      
    3                 if (psock->unattaching || WARN_ON(psock->done)) {
                              err = -EALREADY;
                              break;
                      }
      
    3                 psock->unattaching = 1;
      
                      spin_unlock_bh(&mux->lock);
      
                      /* Lower socket lock should already be held */
                      kcm_unattach(psock);
      
                      err = 0;
                      goto out;
              }
      
    9         spin_unlock_bh(&mux->lock);
      
      out:
   12         fput(csock->file);
              return err;
      }
      
      static struct proto kcm_proto = {
              .name        = "KCM",
              .owner        = THIS_MODULE,
              .obj_size = sizeof(struct kcm_sock),
      };
      
      /* Clone a kcm socket. */
      static struct file *kcm_clone(struct socket *osock)
      {
              struct socket *newsock;
              struct sock *newsk;
      
   15         newsock = sock_alloc();
              if (!newsock)
                      return ERR_PTR(-ENFILE);
      
   15         newsock->type = osock->type;
              newsock->ops = osock->ops;
      
              __module_get(newsock->ops->owner);
      
              newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
                               &kcm_proto, false);
              if (!newsk) {
                      sock_release(newsock);
                      return ERR_PTR(-ENOMEM);
              }
   15         sock_init_data(newsock, newsk);
              init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
      
              return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
      }
      
   35 static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
      {
              int err;
      
  209         switch (cmd) {
              case SIOCKCMATTACH: {
                      struct kcm_attach info;
      
   13                 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
                              return -EFAULT;
      
   12                 err = kcm_attach_ioctl(sock, &info);
      
                      break;
              }
              case SIOCKCMUNATTACH: {
                      struct kcm_unattach info;
      
   13                 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
    1                         return -EFAULT;
      
   13                 err = kcm_unattach_ioctl(sock, &info);
      
                      break;
              }
              case SIOCKCMCLONE: {
                      struct kcm_clone info;
                      struct file *file;
      
   15                 info.fd = get_unused_fd_flags(0);
                      if (unlikely(info.fd < 0))
   27                         return info.fd;
      
   15                 file = kcm_clone(sock);
                      if (IS_ERR(file)) {
                              put_unused_fd(info.fd);
                              return PTR_ERR(file);
                      }
   15                 if (copy_to_user((void __user *)arg, &info,
                                       sizeof(info))) {
    5                         put_unused_fd(info.fd);
                              fput(file);
                              return -EFAULT;
                      }
   10                 fd_install(info.fd, file);
                      err = 0;
                      break;
              }
              default:
                      err = -ENOIOCTLCMD;
                      break;
              }
      
              return err;
      }
      
      static void free_mux(struct rcu_head *rcu)
      {
              struct kcm_mux *mux = container_of(rcu,
                  struct kcm_mux, rcu);
      
              kmem_cache_free(kcm_muxp, mux);
      }
      
      static void release_mux(struct kcm_mux *mux)
      {
   15         struct kcm_net *knet = mux->knet;
              struct kcm_psock *psock, *tmp_psock;
      
              /* Release psocks */
    1         list_for_each_entry_safe(psock, tmp_psock,
                                       &mux->psocks, psock_list) {
    2                 if (!WARN_ON(psock->unattaching))
    2                         kcm_unattach(psock);
              }
      
   15         if (WARN_ON(mux->psocks_cnt))
                      return;
      
   15         __skb_queue_purge(&mux->rx_hold_queue);
      
   15         mutex_lock(&knet->mutex);
              aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
              aggregate_psock_stats(&mux->aggregate_psock_stats,
                                    &knet->aggregate_psock_stats);
              aggregate_strp_stats(&mux->aggregate_strp_stats,
                                   &knet->aggregate_strp_stats);
   15         list_del_rcu(&mux->kcm_mux_list);
              knet->count--;
              mutex_unlock(&knet->mutex);
      
              call_rcu(&mux->rcu, free_mux);
      }
      
      static void kcm_done(struct kcm_sock *kcm)
      {
   20         struct kcm_mux *mux = kcm->mux;
              struct sock *sk = &kcm->sk;
              int socks_cnt;
      
              spin_lock_bh(&mux->rx_lock);
              if (kcm->rx_psock) {
                      /* Cleanup in unreserve_rx_kcm */
                      WARN_ON(kcm->done);
                      kcm->rx_disabled = 1;
                      kcm->done = 1;
                      spin_unlock_bh(&mux->rx_lock);
   18                 return;
              }
      
   20         if (kcm->rx_wait) {
   20                 list_del(&kcm->wait_rx_list);
                      kcm->rx_wait = false;
              }
              /* Move any pending receive messages to other kcm sockets */
   20         requeue_rx_msgs(mux, &sk->sk_receive_queue);
      
              spin_unlock_bh(&mux->rx_lock);
      
              if (WARN_ON(sk_rmem_alloc_get(sk)))
                      return;
      
              /* Detach from MUX */
   20         spin_lock_bh(&mux->lock);
      
   20         list_del(&kcm->kcm_sock_list);
              mux->kcm_socks_cnt--;
              socks_cnt = mux->kcm_socks_cnt;
      
              spin_unlock_bh(&mux->lock);
      
              if (!socks_cnt) {
                      /* We are done with the mux now. */
   15                 release_mux(mux);
              }
      
   20         WARN_ON(kcm->rx_wait);
      
   20         sock_put(&kcm->sk);
      }
      
      /* Called by kcm_release to close a KCM socket.
       * If this is the last KCM socket on the MUX, destroy the MUX.
       */
      static int kcm_release(struct socket *sock)
      {
   20         struct sock *sk = sock->sk;
              struct kcm_sock *kcm;
              struct kcm_mux *mux;
              struct kcm_psock *psock;
      
              if (!sk)
                      return 0;
      
              kcm = kcm_sk(sk);
   20         mux = kcm->mux;
      
              sock_orphan(sk);
              kfree_skb(kcm->seq_skb);
      
              lock_sock(sk);
              /* Purge queue under lock to avoid race condition with tx_work trying
               * to act when queue is nonempty. If tx_work runs after this point
               * it will just return.
               */
    5         __skb_queue_purge(&sk->sk_write_queue);
      
              /* Set tx_stopped. This is checked when psock is bound to a kcm and we
               * get a writespace callback. This prevents further work being queued
               * from the callback (unbinding the psock occurs after canceling work.
               */
   20         kcm->tx_stopped = 1;
      
              release_sock(sk);
      
              spin_lock_bh(&mux->lock);
              if (kcm->tx_wait) {
                      /* Take of tx_wait list, after this point there should be no way
                       * that a psock will be assigned to this kcm.
                       */
    4                 list_del(&kcm->wait_psock_list);
                      kcm->tx_wait = false;
              }
   20         spin_unlock_bh(&mux->lock);
      
              /* Cancel work. After this point there should be no outside references
               * to the kcm socket.
               */
              cancel_work_sync(&kcm->tx_work);
      
              lock_sock(sk);
              psock = kcm->tx_psock;
              if (psock) {
                      /* A psock was reserved, so we need to kill it since it
                       * may already have some bytes queued from a message. We
                       * need to do this after removing kcm from tx_wait list.
                       */
    2                 kcm_abort_tx_psock(psock, EPIPE, false);
                      unreserve_psock(kcm);
              }
   20         release_sock(sk);
      
              WARN_ON(kcm->tx_wait);
   20         WARN_ON(kcm->tx_psock);
      
   20         sock->sk = NULL;
      
              kcm_done(kcm);
      
   18         return 0;
      }
      
      static const struct proto_ops kcm_dgram_ops = {
              .family =        PF_KCM,
              .owner =        THIS_MODULE,
              .release =        kcm_release,
              .bind =                sock_no_bind,
              .connect =        sock_no_connect,
              .socketpair =        sock_no_socketpair,
              .accept =        sock_no_accept,
              .getname =        sock_no_getname,
              .poll =                datagram_poll,
              .ioctl =        kcm_ioctl,
              .listen =        sock_no_listen,
              .shutdown =        sock_no_shutdown,
              .setsockopt =        kcm_setsockopt,
              .getsockopt =        kcm_getsockopt,
              .sendmsg =        kcm_sendmsg,
              .recvmsg =        kcm_recvmsg,
              .mmap =                sock_no_mmap,
              .sendpage =        kcm_sendpage,
      };
      
      static const struct proto_ops kcm_seqpacket_ops = {
              .family =        PF_KCM,
              .owner =        THIS_MODULE,
              .release =        kcm_release,
              .bind =                sock_no_bind,
              .connect =        sock_no_connect,
              .socketpair =        sock_no_socketpair,
              .accept =        sock_no_accept,
              .getname =        sock_no_getname,
              .poll =                datagram_poll,
              .ioctl =        kcm_ioctl,
              .listen =        sock_no_listen,
              .shutdown =        sock_no_shutdown,
              .setsockopt =        kcm_setsockopt,
              .getsockopt =        kcm_getsockopt,
              .sendmsg =        kcm_sendmsg,
              .recvmsg =        kcm_recvmsg,
              .mmap =                sock_no_mmap,
              .sendpage =        kcm_sendpage,
              .splice_read =        kcm_splice_read,
      };
      
      /* Create proto operation for kcm sockets */
      static int kcm_create(struct net *net, struct socket *sock,
                            int protocol, int kern)
      {
   97         struct kcm_net *knet = net_generic(net, kcm_net_id);
              struct sock *sk;
              struct kcm_mux *mux;
      
   94         switch (sock->type) {
              case SOCK_DGRAM:
   40                 sock->ops = &kcm_dgram_ops;
                      break;
              case SOCK_SEQPACKET:
   35                 sock->ops = &kcm_seqpacket_ops;
                      break;
              default:
                      return -ESOCKTNOSUPPORT;
              }
      
   74         if (protocol != KCMPROTO_CONNECTED)
                      return -EPROTONOSUPPORT;
      
   73         sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
              if (!sk)
                      return -ENOMEM;
      
              /* Allocate a kcm mux, shared between KCM sockets */
   73         mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
              if (!mux) {
                      sk_free(sk);
                      return -ENOMEM;
              }
      
   71         spin_lock_init(&mux->lock);
              spin_lock_init(&mux->rx_lock);
              INIT_LIST_HEAD(&mux->kcm_socks);
              INIT_LIST_HEAD(&mux->kcm_rx_waiters);
              INIT_LIST_HEAD(&mux->kcm_tx_waiters);
      
              INIT_LIST_HEAD(&mux->psocks);
              INIT_LIST_HEAD(&mux->psocks_ready);
              INIT_LIST_HEAD(&mux->psocks_avail);
      
              mux->knet = knet;
      
              /* Add new MUX to list */
              mutex_lock(&knet->mutex);
   71         list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
   71         knet->count++;
              mutex_unlock(&knet->mutex);
      
              skb_queue_head_init(&mux->rx_hold_queue);
      
              /* Init KCM socket */
              sock_init_data(sock, sk);
              init_kcm_sock(kcm_sk(sk), mux);
      
              return 0;
      }
      
      static const struct net_proto_family kcm_family_ops = {
              .family = PF_KCM,
              .create = kcm_create,
              .owner  = THIS_MODULE,
      };
      
      static __net_init int kcm_init_net(struct net *net)
      {
              struct kcm_net *knet = net_generic(net, kcm_net_id);
      
              INIT_LIST_HEAD_RCU(&knet->mux_list);
              mutex_init(&knet->mutex);
      
              return 0;
      }
      
      static __net_exit void kcm_exit_net(struct net *net)
      {
              struct kcm_net *knet = net_generic(net, kcm_net_id);
      
              /* All KCM sockets should be closed at this point, which should mean
               * that all multiplexors and psocks have been destroyed.
               */
              WARN_ON(!list_empty(&knet->mux_list));
      }
      
      static struct pernet_operations kcm_net_ops = {
              .init = kcm_init_net,
              .exit = kcm_exit_net,
              .id   = &kcm_net_id,
              .size = sizeof(struct kcm_net),
      };
      
      static int __init kcm_init(void)
      {
              int err = -ENOMEM;
      
              kcm_muxp = kmem_cache_create("kcm_mux_cache",
                                           sizeof(struct kcm_mux), 0,
                                           SLAB_HWCACHE_ALIGN, NULL);
              if (!kcm_muxp)
                      goto fail;
      
              kcm_psockp = kmem_cache_create("kcm_psock_cache",
                                             sizeof(struct kcm_psock), 0,
                                              SLAB_HWCACHE_ALIGN, NULL);
              if (!kcm_psockp)
                      goto fail;
      
              kcm_wq = create_singlethread_workqueue("kkcmd");
              if (!kcm_wq)
                      goto fail;
      
              err = proto_register(&kcm_proto, 1);
              if (err)
                      goto fail;
      
              err = register_pernet_device(&kcm_net_ops);
              if (err)
                      goto net_ops_fail;
      
              err = sock_register(&kcm_family_ops);
              if (err)
                      goto sock_register_fail;
      
              err = kcm_proc_init();
              if (err)
                      goto proc_init_fail;
      
              return 0;
      
      proc_init_fail:
              sock_unregister(PF_KCM);
      
      sock_register_fail:
              unregister_pernet_device(&kcm_net_ops);
      
      net_ops_fail:
              proto_unregister(&kcm_proto);
      
      fail:
              kmem_cache_destroy(kcm_muxp);
              kmem_cache_destroy(kcm_psockp);
      
              if (kcm_wq)
                      destroy_workqueue(kcm_wq);
      
              return err;
      }
      
      static void __exit kcm_exit(void)
      {
              kcm_proc_exit();
              sock_unregister(PF_KCM);
              unregister_pernet_device(&kcm_net_ops);
              proto_unregister(&kcm_proto);
              destroy_workqueue(kcm_wq);
      
              kmem_cache_destroy(kcm_muxp);
              kmem_cache_destroy(kcm_psockp);
      }
      
      module_init(kcm_init);
      module_exit(kcm_exit);
      
      MODULE_LICENSE("GPL");
      MODULE_ALIAS_NETPROTO(PF_KCM);
      /* Copyright 2011, Siemens AG
       * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
       */
      
      /* Based on patches from Jon Smirl <jonsmirl@gmail.com>
       * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
       *
       * This program is free software; you can redistribute it and/or modify
       * it under the terms of the GNU General Public License version 2
       * as published by the Free Software Foundation.
       *
       * This program is distributed in the hope that it will be useful,
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       * GNU General Public License for more details.
       */
      
      /* Jon's code is based on 6lowpan implementation for Contiki which is:
       * Copyright (c) 2008, Swedish Institute of Computer Science.
       * All rights reserved.
       *
       * Redistribution and use in source and binary forms, with or without
       * modification, are permitted provided that the following conditions
       * are met:
       * 1. Redistributions of source code must retain the above copyright
       *    notice, this list of conditions and the following disclaimer.
       * 2. Redistributions in binary form must reproduce the above copyright
       *    notice, this list of conditions and the following disclaimer in the
       *    documentation and/or other materials provided with the distribution.
       * 3. Neither the name of the Institute nor the names of its contributors
       *    may be used to endorse or promote products derived from this software
       *    without specific prior written permission.
       *
       * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
       * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
       * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       * SUCH DAMAGE.
       */
      
      #include <linux/module.h>
      #include <linux/netdevice.h>
      #include <linux/ieee802154.h>
      
      #include <net/ipv6.h>
      
      #include "6lowpan_i.h"
      
      static int open_count;
      
      static const struct header_ops lowpan_header_ops = {
              .create        = lowpan_header_create,
      };
      
      static int lowpan_open(struct net_device *dev)
      {
              if (!open_count)
                      lowpan_rx_init();
              open_count++;
              return 0;
      }
      
      static int lowpan_stop(struct net_device *dev)
      {
              open_count--;
              if (!open_count)
                      lowpan_rx_exit();
              return 0;
      }
      
      static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
      {
              struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
      
              /* default no short_addr is available for a neighbour */
              neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
              return 0;
      }
      
      static int lowpan_get_iflink(const struct net_device *dev)
      {
              return lowpan_802154_dev(dev)->wdev->ifindex;
      }
      
      static const struct net_device_ops lowpan_netdev_ops = {
              .ndo_start_xmit                = lowpan_xmit,
              .ndo_open                = lowpan_open,
              .ndo_stop                = lowpan_stop,
              .ndo_neigh_construct    = lowpan_neigh_construct,
              .ndo_get_iflink         = lowpan_get_iflink,
      };
      
      static void lowpan_setup(struct net_device *ldev)
      {
              memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN);
              /* We need an ipv6hdr as minimum len when calling xmit */
              ldev->hard_header_len        = sizeof(struct ipv6hdr);
              ldev->flags                = IFF_BROADCAST | IFF_MULTICAST;
              ldev->priv_flags        |= IFF_NO_QUEUE;
      
              ldev->netdev_ops        = &lowpan_netdev_ops;
              ldev->header_ops        = &lowpan_header_ops;
              ldev->needs_free_netdev        = true;
              ldev->features                |= NETIF_F_NETNS_LOCAL;
      }
      
      static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[],
                                 struct netlink_ext_ack *extack)
      {
              if (tb[IFLA_ADDRESS]) {
                      if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
                              return -EINVAL;
              }
              return 0;
      }
      
      static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
                                struct nlattr *tb[], struct nlattr *data[],
                                struct netlink_ext_ack *extack)
      {
              struct net_device *wdev;
              int ret;
      
              ASSERT_RTNL();
      
              pr_debug("adding new link\n");
      
              if (!tb[IFLA_LINK])
                      return -EINVAL;
              /* find and hold wpan device */
              wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
              if (!wdev)
                      return -ENODEV;
              if (wdev->type != ARPHRD_IEEE802154) {
                      dev_put(wdev);
                      return -EINVAL;
              }
      
              if (wdev->ieee802154_ptr->lowpan_dev) {
                      dev_put(wdev);
                      return -EBUSY;
              }
      
              lowpan_802154_dev(ldev)->wdev = wdev;
              /* Set the lowpan hardware address to the wpan hardware address. */
              memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
              /* We need headroom for possible wpan_dev_hard_header call and tailroom
               * for encryption/fcs handling. The lowpan interface will replace
               * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
               * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6
               * header.
               */
              ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN +
                                      wdev->needed_headroom;
              ldev->needed_tailroom = wdev->needed_tailroom;
      
              ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh);
      
              ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154);
              if (ret < 0) {
                      dev_put(wdev);
                      return ret;
              }
      
              wdev->ieee802154_ptr->lowpan_dev = ldev;
              return 0;
      }
      
      static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
      {
              struct net_device *wdev = lowpan_802154_dev(ldev)->wdev;
      
              ASSERT_RTNL();
      
              wdev->ieee802154_ptr->lowpan_dev = NULL;
              lowpan_unregister_netdevice(ldev);
              dev_put(wdev);
      }
      
      static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
              .kind                = "lowpan",
              .priv_size        = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)),
              .setup                = lowpan_setup,
              .newlink        = lowpan_newlink,
              .dellink        = lowpan_dellink,
              .validate        = lowpan_validate,
      };
      
      static inline int __init lowpan_netlink_init(void)
      {
              return rtnl_link_register(&lowpan_link_ops);
      }
      
      static inline void lowpan_netlink_fini(void)
      {
              rtnl_link_unregister(&lowpan_link_ops);
      }
      
      static int lowpan_device_event(struct notifier_block *unused,
                                     unsigned long event, void *ptr)
      {
 1198         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
              struct wpan_dev *wpan_dev;
      
              if (ndev->type != ARPHRD_IEEE802154)
 1198                 return NOTIFY_DONE;
    2         wpan_dev = ndev->ieee802154_ptr;
              if (!wpan_dev)
                      return NOTIFY_DONE;
      
              switch (event) {
              case NETDEV_UNREGISTER:
                      /* Check if wpan interface is unregistered that we
                       * also delete possible lowpan interfaces which belongs
                       * to the wpan interface.
                       */
                      if (wpan_dev->lowpan_dev)
                              lowpan_dellink(wpan_dev->lowpan_dev, NULL);
                      break;
              default:
                      return NOTIFY_DONE;
              }
      
              return NOTIFY_OK;
      }
      
      static struct notifier_block lowpan_dev_notifier = {
              .notifier_call = lowpan_device_event,
      };
      
      static int __init lowpan_init_module(void)
      {
              int err = 0;
      
              err = lowpan_net_frag_init();
              if (err < 0)
                      goto out;
      
              err = lowpan_netlink_init();
              if (err < 0)
                      goto out_frag;
      
              err = register_netdevice_notifier(&lowpan_dev_notifier);
              if (err < 0)
                      goto out_pack;
      
              return 0;
      
      out_pack:
              lowpan_netlink_fini();
      out_frag:
              lowpan_net_frag_exit();
      out:
              return err;
      }
      
      static void __exit lowpan_cleanup_module(void)
      {
              lowpan_netlink_fini();
      
              lowpan_net_frag_exit();
      
              unregister_netdevice_notifier(&lowpan_dev_notifier);
      }
      
      module_init(lowpan_init_module);
      module_exit(lowpan_cleanup_module);
      MODULE_LICENSE("GPL");
      MODULE_ALIAS_RTNL_LINK("lowpan");
      // SPDX-License-Identifier: GPL-2.0-or-later
      /*
       * INET                An implementation of the TCP/IP protocol suite for the LINUX
       *                operating system.  INET is implemented using the BSD Socket
       *                interface as the means of communication with the user level.
       *
       *                Generic INET transport hashtables
       *
       * Authors:        Lotsa people, from code originally in tcp
       */
      
      #include <linux/module.h>
      #include <linux/random.h>
      #include <linux/sched.h>
      #include <linux/slab.h>
      #include <linux/wait.h>
      #include <linux/vmalloc.h>
      #include <linux/memblock.h>
      
      #include <net/addrconf.h>
      #include <net/inet_connection_sock.h>
      #include <net/inet_hashtables.h>
      #include <net/secure_seq.h>
      #include <net/ip.h>
      #include <net/tcp.h>
      #include <net/sock_reuseport.h>
      
      static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
                              const __u16 lport, const __be32 faddr,
                              const __be16 fport)
      {
              static u32 inet_ehash_secret __read_mostly;
      
   59         net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
      
              return __inet_ehashfn(laddr, lport, faddr, fport,
   59                               inet_ehash_secret + net_hash_mix(net));
      }
      
      /* This function handles inet_sock, but also timewait and request sockets
       * for IPv4/IPv6.
       */
      static u32 sk_ehashfn(const struct sock *sk)
      {
      #if IS_ENABLED(CONFIG_IPV6)
  173         if (sk->sk_family == AF_INET6 &&
  129             !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
  120                 return inet6_ehashfn(sock_net(sk),
                                           &sk->sk_v6_rcv_saddr, sk->sk_num,
                                           &sk->sk_v6_daddr, sk->sk_dport);
      #endif
   57         return inet_ehashfn(sock_net(sk),
                                  sk->sk_rcv_saddr, sk->sk_num,
                                  sk->sk_daddr, sk->sk_dport);
      }
      
      /*
       * Allocate and initialize a new local port bind bucket.
       * The bindhash mutex for snum's hash chain must be held here.
       */
      struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
                                                       struct net *net,
                                                       struct inet_bind_hashbucket *head,
                                                       const unsigned short snum,
                                                       int l3mdev)
      {
  201         struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
      
              if (tb) {
  200                 write_pnet(&tb->ib_net, net);
                      tb->l3mdev    = l3mdev;
                      tb->port      = snum;
                      tb->fastreuse = 0;
                      tb->fastreuseport = 0;
                      INIT_HLIST_HEAD(&tb->owners);
  200                 hlist_add_head(&tb->node, &head->chain);
              }
  200         return tb;
      }
      
      /*
       * Caller must hold hashbucket lock for this tb with local BH disabled
       */
   80 void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
      {
              if (hlist_empty(&tb->owners)) {
                      __hlist_del(&tb->node);
   80                 kmem_cache_free(cachep, tb);
              }
      }
      
      void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
                          const unsigned short snum)
      {
   27         inet_sk(sk)->inet_num = snum;
  200         sk_add_bind_node(sk, &tb->owners);
              inet_csk(sk)->icsk_bind_hash = tb;
      }
      
      /*
       * Get rid of any references to a local port held by the given sock.
       */
      static void __inet_put_port(struct sock *sk)
      {
              struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
              const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
                              hashinfo->bhash_size);
              struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
              struct inet_bind_bucket *tb;
      
              spin_lock(&head->lock);
              tb = inet_csk(sk)->icsk_bind_hash;
              __sk_del_bind_node(sk);
   80         inet_csk(sk)->icsk_bind_hash = NULL;
              inet_sk(sk)->inet_num = 0;
   80         inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
   80         spin_unlock(&head->lock);
      }
      
      void inet_put_port(struct sock *sk)
      {
   80         local_bh_disable();
   80         __inet_put_port(sk);
   80         local_bh_enable();
      }
      EXPORT_SYMBOL(inet_put_port);
      
      int __inet_inherit_port(const struct sock *sk, struct sock *child)
      {
              struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
              unsigned short port = inet_sk(child)->inet_num;
              const int bhash = inet_bhashfn(sock_net(sk), port,
                              table->bhash_size);
              struct inet_bind_hashbucket *head = &table->bhash[bhash];
              struct inet_bind_bucket *tb;
              int l3mdev;
      
              spin_lock(&head->lock);
              tb = inet_csk(sk)->icsk_bind_hash;
              if (unlikely(!tb)) {
                      spin_unlock(&head->lock);
                      return -ENOENT;
              }
              if (tb->port != port) {
                      l3mdev = inet_sk_bound_l3mdev(sk);
      
                      /* NOTE: using tproxy and redirecting skbs to a proxy
                       * on a different listener port breaks the assumption
                       * that the listener socket's icsk_bind_hash is the same
                       * as that of the child socket. We have to look up or
                       * create a new bind bucket for the child here. */
                      inet_bind_bucket_for_each(tb, &head->chain) {
                              if (net_eq(ib_net(tb), sock_net(sk)) &&
                                  tb->l3mdev == l3mdev && tb->port == port)
                                      break;
                      }
                      if (!tb) {
                              tb = inet_bind_bucket_create(table->bind_bucket_cachep,
                                                           sock_net(sk), head, port,
                                                           l3mdev);
                              if (!tb) {
                                      spin_unlock(&head->lock);
                                      return -ENOMEM;
                              }
                      }
              }
              inet_bind_hash(child, tb, port);
              spin_unlock(&head->lock);
      
              return 0;
      }
      EXPORT_SYMBOL_GPL(__inet_inherit_port);
      
      static struct inet_listen_hashbucket *
      inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
      {
              u32 hash;
      
      #if IS_ENABLED(CONFIG_IPV6)
              if (sk->sk_family == AF_INET6)
                      hash = ipv6_portaddr_hash(sock_net(sk),
                                                &sk->sk_v6_rcv_saddr,
                                                inet_sk(sk)->inet_num);
              else
      #endif
                      hash = ipv4_portaddr_hash(sock_net(sk),
                                                inet_sk(sk)->inet_rcv_saddr,
                                                inet_sk(sk)->inet_num);
              return inet_lhash2_bucket(h, hash);
      }
      
      static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
      {
              struct inet_listen_hashbucket *ilb2;
      
              if (!h->lhash2)
                      return;
      
              ilb2 = inet_lhash2_bucket_sk(h, sk);
      
              spin_lock(&ilb2->lock);
              if (sk->sk_reuseport && sk->sk_family == AF_INET6)
                      hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
                                         &ilb2->head);
              else
                      hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
                                         &ilb2->head);
              ilb2->count++;
              spin_unlock(&ilb2->lock);
      }
      
      static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
      {
              struct inet_listen_hashbucket *ilb2;
      
              if (!h->lhash2 ||
                  WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
                      return;
      
              ilb2 = inet_lhash2_bucket_sk(h, sk);
      
              spin_lock(&ilb2->lock);
              hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
              ilb2->count--;
              spin_unlock(&ilb2->lock);
      }
      
      static inline int compute_score(struct sock *sk, struct net *net,
                                      const unsigned short hnum, const __be32 daddr,
                                      const int dif, const int sdif, bool exact_dif)
      {
              int score = -1;
      
    2         if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
    1                         !ipv6_only_sock(sk)) {
    1                 if (sk->sk_rcv_saddr != daddr)
                              return -1;
      
    1                 if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
                              return -1;
      
    1                 score = sk->sk_family == PF_INET ? 2 : 1;
    1                 if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
                              score++;
              }
              return score;
      }
      
      /*
       * Here are some nice properties to exploit here. The BSD API
       * does not allow a listening sock to specify the remote port nor the
       * remote address for the connection. So always assume those are both
       * wildcarded during the search since they can never be otherwise.
       */
      
      /* called with rcu_read_lock() : No refcount taken on the socket */
      static struct sock *inet_lhash2_lookup(struct net *net,
                                      struct inet_listen_hashbucket *ilb2,
                                      struct sk_buff *skb, int doff,
                                      const __be32 saddr, __be16 sport,
                                      const __be32 daddr, const unsigned short hnum,
                                      const int dif, const int sdif)
      {
              bool exact_dif = inet_exact_dif_match(net, skb);
              struct inet_connection_sock *icsk;
              struct sock *sk, *result = NULL;
              int score, hiscore = 0;
              u32 phash = 0;
      
    2         inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
                      sk = (struct sock *)icsk;
    2                 score = compute_score(sk, net, hnum, daddr,
                                            dif, sdif, exact_dif);
    1                 if (score > hiscore) {
    1                         if (sk->sk_reuseport) {
                                      phash = inet_ehashfn(net, daddr, hnum,
                                                           saddr, sport);
                                      result = reuseport_select_sock(sk, phash,
                                                                     skb, doff);
                                      if (result)
                                              return result;
                              }
                              result = sk;
                              hiscore = score;
                      }
              }
      
              return result;
      }
      
      struct sock *__inet_lookup_listener(struct net *net,
                                          struct inet_hashinfo *hashinfo,
                                          struct sk_buff *skb, int doff,
                                          const __be32 saddr, __be16 sport,
                                          const __be32 daddr, const unsigned short hnum,
                                          const int dif, const int sdif)
      {
              struct inet_listen_hashbucket *ilb2;
              struct sock *result = NULL;
              unsigned int hash2;
      
    2         hash2 = ipv4_portaddr_hash(net, daddr, hnum);
              ilb2 = inet_lhash2_bucket(hashinfo, hash2);
      
              result = inet_lhash2_lookup(net, ilb2, skb, doff,
                                          saddr, sport, daddr, hnum,
                                          dif, sdif);
              if (result)
                      goto done;
      
              /* Lookup lhash2 with INADDR_ANY */
    2         hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
              ilb2 = inet_lhash2_bucket(hashinfo, hash2);
      
              result = inet_lhash2_lookup(net, ilb2, skb, doff,
                                          saddr, sport, htonl(INADDR_ANY), hnum,
                                          dif, sdif);
      done:
    2         if (IS_ERR(result))
    2                 return NULL;
              return result;
      }
      EXPORT_SYMBOL_GPL(__inet_lookup_listener);
      
      /* All sockets share common refcount, but have different destructors */
      void sock_gen_put(struct sock *sk)
      {
   18         if (!refcount_dec_and_test(&sk->sk_refcnt))
                      return;
      
              if (sk->sk_state == TCP_TIME_WAIT)
                      inet_twsk_free(inet_twsk(sk));
              else if (sk->sk_state == TCP_NEW_SYN_RECV)
                      reqsk_free(inet_reqsk(sk));
              else
                      sk_free(sk);
      }
      EXPORT_SYMBOL_GPL(sock_gen_put);
      
      void sock_edemux(struct sk_buff *skb)
      {
              sock_gen_put(skb->sk);
      }
      EXPORT_SYMBOL(sock_edemux);
      
      struct sock *__inet_lookup_established(struct net *net,
                                        struct inet_hashinfo *hashinfo,
                                        const __be32 saddr, const __be16 sport,
                                        const __be32 daddr, const u16 hnum,
                                        const int dif, const int sdif)
      {
    2         INET_ADDR_COOKIE(acookie, saddr, daddr);
              const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
              struct sock *sk;
              const struct hlist_nulls_node *node;
              /* Optimize here for direct hit, only listening connections can
               * have wildcards anyways.
               */
              unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
              unsigned int slot = hash & hashinfo->ehash_mask;
              struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
      
      begin:
    2         sk_nulls_for_each_rcu(sk, node, &head->chain) {
                      if (sk->sk_hash != hash)
                              continue;
                      if (likely(INET_MATCH(sk, net, acookie,
                                            saddr, daddr, ports, dif, sdif))) {
                              if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
                                      goto out;
                              if (unlikely(!INET_MATCH(sk, net, acookie,
                                                       saddr, daddr, ports,
                                                       dif, sdif))) {
                                      sock_gen_put(sk);
                                      goto begin;
                              }
                              goto found;
                      }
              }
              /*
               * if the nulls value we got at the end of this lookup is
               * not the expected one, we must restart lookup.
               * We probably met an item that was moved to another chain.
               */
    2         if (get_nulls_value(node) != slot)
                      goto begin;
      out:
              sk = NULL;
      found:
    2         return sk;
      }
      EXPORT_SYMBOL_GPL(__inet_lookup_established);
      
      /* called with local bh disabled */
      static int __inet_check_established(struct inet_timewait_death_row *death_row,
                                          struct sock *sk, __u16 lport,
                                          struct inet_timewait_sock **twp)
      {
              struct inet_hashinfo *hinfo = death_row->hashinfo;
              struct inet_sock *inet = inet_sk(sk);
              __be32 daddr = inet->inet_rcv_saddr;
              __be32 saddr = inet->inet_daddr;
              int dif = sk->sk_bound_dev_if;
              struct net *net = sock_net(sk);
              int sdif = l3mdev_master_ifindex_by_index(net, dif);
              INET_ADDR_COOKIE(acookie, saddr, daddr);
              const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
              unsigned int hash = inet_ehashfn(net, daddr, lport,
                                               saddr, inet->inet_dport);
              struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
              spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
              struct sock *sk2;
              const struct hlist_nulls_node *node;
              struct inet_timewait_sock *tw = NULL;
      
              spin_lock(lock);
      
              sk_nulls_for_each(sk2, node, &head->chain) {
                      if (sk2->sk_hash != hash)
                              continue;
      
                      if (likely(INET_MATCH(sk2, net, acookie,
                                               saddr, daddr, ports, dif, sdif))) {
                              if (sk2->sk_state == TCP_TIME_WAIT) {
                                      tw = inet_twsk(sk2);
                                      if (twsk_unique(sk, sk2, twp))
                                              break;
                              }
                              goto not_unique;
                      }
              }
      
              /* Must record num and sport now. Otherwise we will see
               * in hash table socket with a funny identity.
               */
              inet->inet_num = lport;
              inet->inet_sport = htons(lport);
              sk->sk_hash = hash;
              WARN_ON(!sk_unhashed(sk));
              __sk_nulls_add_node_rcu(sk, &head->chain);
              if (tw) {
                      sk_nulls_del_node_init_rcu((struct sock *)tw);
                      __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
              }
              spin_unlock(lock);
              sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
      
              if (twp) {
                      *twp = tw;
              } else if (tw) {
                      /* Silly. Should hash-dance instead... */
                      inet_twsk_deschedule_put(tw);
              }
              return 0;
      
      not_unique:
              spin_unlock(lock);
              return -EADDRNOTAVAIL;
      }
      
      static u32 inet_sk_port_offset(const struct sock *sk)
      {
              const struct inet_sock *inet = inet_sk(sk);
      
              return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
                                                inet->inet_daddr,
   57                                           inet->inet_dport);
      }
      
      /* insert a socket into ehash, and eventually remove another one
       * (The another one can be a SYN_RECV or TIMEWAIT
       */
      bool inet_ehash_insert(struct sock *sk, struct sock *osk)
      {
  173         struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
              struct hlist_nulls_head *list;
              struct inet_ehash_bucket *head;
              spinlock_t *lock;
              bool ret = true;
      
              WARN_ON_ONCE(!sk_unhashed(sk));
      
  173         sk->sk_hash = sk_ehashfn(sk);
  173         head = inet_ehash_bucket(hashinfo, sk->sk_hash);
              list = &head->chain;
              lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
      
              spin_lock(lock);
              if (osk) {
                      WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
                      ret = sk_nulls_del_node_init_rcu(osk);
              }
              if (ret)
                      __sk_nulls_add_node_rcu(sk, list);
  173         spin_unlock(lock);
              return ret;
      }
      
      bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
      {
  173         bool ok = inet_ehash_insert(sk, osk);
      
              if (ok) {
  173                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
              } else {
                      percpu_counter_inc(sk->sk_prot->orphan_count);
                      inet_sk_set_state(sk, TCP_CLOSE);
                      sock_set_flag(sk, SOCK_DEAD);
                      inet_csk_destroy_sock(sk);
              }
  173         return ok;
      }
      EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
      
      static int inet_reuseport_add_sock(struct sock *sk,
                                         struct inet_listen_hashbucket *ilb)
      {
              struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
              const struct hlist_nulls_node *node;
              struct sock *sk2;
              kuid_t uid = sock_i_uid(sk);