1"""Functions for computing rich-club coefficients."""
2
3from itertools import accumulate
4
5import networkx as nx
6from networkx.utils import not_implemented_for
7
8__all__ = ["rich_club_coefficient"]
9
10
11@not_implemented_for("directed")
12@not_implemented_for("multigraph")
13@nx._dispatchable
14def rich_club_coefficient(G, normalized=True, Q=100, seed=None):
15 r"""Returns the rich-club coefficient of the graph `G`.
16
17 For each degree *k*, the *rich-club coefficient* is the ratio of the
18 number of actual to the number of potential edges for nodes with
19 degree greater than *k*:
20
21 .. math::
22
23 \phi(k) = \frac{2 E_k}{N_k (N_k - 1)}
24
25 where `N_k` is the number of nodes with degree larger than *k*, and
26 `E_k` is the number of edges among those nodes.
27
28 Parameters
29 ----------
30 G : NetworkX graph
31 Undirected graph with neither parallel edges nor self-loops.
32 normalized : bool (optional)
33 Normalize using randomized network as in [1]_
34 Q : float (optional, default=100)
35 If `normalized` is True, perform `Q * m` double-edge
36 swaps, where `m` is the number of edges in `G`, to use as a
37 null-model for normalization.
38 seed : integer, random_state, or None (default)
39 Indicator of random number generation state.
40 See :ref:`Randomness<randomness>`.
41
42 Returns
43 -------
44 rc : dictionary
45 A dictionary, keyed by degree, with rich-club coefficient values.
46
47 Raises
48 ------
49 NetworkXError
50 If `G` has fewer than four nodes and ``normalized=True``.
51 A randomly sampled graph for normalization cannot be generated in this case.
52
53 Examples
54 --------
55 >>> G = nx.Graph([(0, 1), (0, 2), (1, 2), (1, 3), (1, 4), (4, 5)])
56 >>> rc = nx.rich_club_coefficient(G, normalized=False, seed=42)
57 >>> rc[0]
58 0.4
59
60 Notes
61 -----
62 The rich club definition and algorithm are found in [1]_. This
63 algorithm ignores any edge weights and is not defined for directed
64 graphs or graphs with parallel edges or self loops.
65
66 Normalization is done by computing the rich club coefficient for a randomly
67 sampled graph with the same degree distribution as `G` by
68 repeatedly swapping the endpoints of existing edges. For graphs with fewer than 4
69 nodes, it is not possible to generate a random graph with a prescribed
70 degree distribution, as the degree distribution fully determines the graph
71 (hence making the coefficients trivially normalized to 1).
72 This function raises an exception in this case.
73
74 Estimates for appropriate values of `Q` are found in [2]_.
75
76 References
77 ----------
78 .. [1] Julian J. McAuley, Luciano da Fontoura Costa,
79 and Tibério S. Caetano,
80 "The rich-club phenomenon across complex network hierarchies",
81 Applied Physics Letters Vol 91 Issue 8, August 2007.
82 https://arxiv.org/abs/physics/0701290
83 .. [2] R. Milo, N. Kashtan, S. Itzkovitz, M. E. J. Newman, U. Alon,
84 "Uniform generation of random graphs with arbitrary degree
85 sequences", 2006. https://arxiv.org/abs/cond-mat/0312028
86 """
87 if nx.number_of_selfloops(G) > 0:
88 raise Exception(
89 "rich_club_coefficient is not implemented for graphs with self loops."
90 )
91 rc = _compute_rc(G)
92 if normalized:
93 # make R a copy of G, randomize with Q*|E| double edge swaps
94 # and use rich_club coefficient of R to normalize
95 R = G.copy()
96 E = R.number_of_edges()
97 nx.double_edge_swap(R, Q * E, max_tries=Q * E * 10, seed=seed)
98 rcran = _compute_rc(R)
99 rc = {k: v / rcran[k] for k, v in rc.items()}
100 return rc
101
102
103def _compute_rc(G):
104 """Returns the rich-club coefficient for each degree in the graph
105 `G`.
106
107 `G` is an undirected graph without multiedges.
108
109 Returns a dictionary mapping degree to rich-club coefficient for
110 that degree.
111
112 """
113 deghist = nx.degree_histogram(G)
114 total = sum(deghist)
115 # Compute the number of nodes with degree greater than `k`, for each
116 # degree `k` (omitting the last entry, which is zero).
117 nks = (total - cs for cs in accumulate(deghist) if total - cs > 1)
118 # Create a sorted list of pairs of edge endpoint degrees.
119 #
120 # The list is sorted in reverse order so that we can pop from the
121 # right side of the list later, instead of popping from the left
122 # side of the list, which would have a linear time cost.
123 edge_degrees = sorted((sorted(map(G.degree, e)) for e in G.edges()), reverse=True)
124 ek = G.number_of_edges()
125 if ek == 0:
126 return {}
127
128 k1, k2 = edge_degrees.pop()
129 rc = {}
130 for d, nk in enumerate(nks):
131 while k1 <= d:
132 if len(edge_degrees) == 0:
133 ek = 0
134 break
135 k1, k2 = edge_degrees.pop()
136 ek -= 1
137 rc[d] = 2 * ek / (nk * (nk - 1))
138 return rc