1r"""Computation of graph non-randomness."""
2
3import math
4
5import networkx as nx
6from networkx.utils import not_implemented_for
7
8__all__ = ["non_randomness"]
9
10
11@not_implemented_for("directed")
12@not_implemented_for("multigraph")
13@nx._dispatchable(edge_attrs="weight")
14def non_randomness(G, k=None, weight="weight"):
15 """Compute the non-randomness of a graph.
16
17 The first value $R_G$ is the sum of non-randomness values of all
18 edges within the graph (where the non-randomness of an edge tends to be
19 small when the two nodes linked by that edge are from two different
20 communities).
21
22 The second value $R_G^*$ is a relative measure that indicates
23 to what extent `G` is different from a random graph in terms
24 of probability. The closer it is to 0, the higher the likelihood
25 the graph was generated by an Erdős--Rényi model.
26
27 Parameters
28 ----------
29 G : NetworkX graph
30 Graph must be undirected, connected, and without self-loops.
31
32 k : int or None, optional (default=None)
33 The number of communities in `G`.
34 If `k` is not set, the function uses a default community detection
35 algorithm (:func:`~networkx.algorithms.community.label_propagation_communities`)
36 to set it.
37
38 weight : string or None, optional (default="weight")
39 The name of an edge attribute that holds the numerical value used
40 as a weight. If `None`, then each edge has weight 1, i.e., the graph is
41 binary.
42
43 Returns
44 -------
45 (float, float) tuple
46 The first value is $R_G$, the non-randomness of the graph,
47 the second is $R_G^*$, the relative non-randomness
48 w.r.t. the Erdős--Rényi model.
49
50 Raises
51 ------
52 NetworkXNotImplemented
53 If the input graph is directed or a multigraph.
54
55 NetworkXException
56 If the input graph is not connected.
57
58 NetworkXError
59 If the input graph contains self-loops or has no edges.
60
61 ValueError
62 If `k` is not in $\\{1, \\dots, n-1\\}$, where $n$ is the number of nodes,
63 or if `k` is such that the computed edge probability
64 $p = \\frac{2km}{n(n-k)}$ does not satisfy $0 < p < 1$.
65
66 Examples
67 --------
68 >>> G = nx.karate_club_graph()
69 >>> nr, nr_rd = nx.non_randomness(G, 2)
70 >>> nr, nr_rd = nx.non_randomness(G, 2, "weight")
71
72 When the number of communities `k` is not specified,
73 :func:`~networkx.algorithms.community.label_propagation_communities`
74 is used to compute it.
75 This algorithm can give different results depending on
76 the order of nodes and edges in the graph.
77 For example, while the following graphs are identical,
78 computing the non-randomness of each of them yields different results:
79
80 >>> G1, G2 = nx.Graph(), nx.Graph()
81 >>> G1.add_edges_from([(0, 1), (1, 2), (1, 3), (3, 4)])
82 >>> G2.add_edges_from([(0, 1), (1, 3), (1, 2), (3, 4)])
83 >>> [round(r, 6) for r in nx.non_randomness(G1)]
84 [-1.847759, -5.842437]
85 >>> [round(r, 6) for r in nx.non_randomness(G2)]
86 Traceback (most recent call last):
87 ...
88 ValueError: invalid number of communities for graph with 5 nodes and 4 edges: 2
89
90 This is because the community detection algorithm finds
91 1 community in `G1` and 2 communities in `G2`.
92 This can be resolved by specifying the number of communities `k`:
93
94 >>> [round(r, 6) for r in nx.non_randomness(G2, k=1)]
95 [-1.847759, -5.842437]
96
97 Notes
98 -----
99 If a `weight` argument is passed, this algorithm will use the eigenvalues
100 of the weighted adjacency matrix instead.
101
102 The output of this function corresponds to (4.4) and (4.5) in [1]_.
103 A lower value of $R^*_G$ indicates a more random graph;
104 one can think of $1 - \\Phi(R_G^*)$ as the similarity
105 between the graph and a random graph,
106 where $\\Phi(x)$ is the cumulative distribution function
107 of the standard normal distribution.
108
109 Theorem 2 in [2]_ states that for any graph $G$
110 with $n$ nodes, $m$ edges, and $k$ communities,
111 its non-randomness is bounded below by the non-randomness of an
112 $r$-regular graph (a graph where each node has degree $r$),
113 and bounded above by the non-randomness of an $l$-complete graph
114 (a graph where each community is a clique of $l$ nodes).
115
116 References
117 ----------
118 .. [1] Xiaowei Ying and Xintao Wu,
119 On Randomness Measures for Social Networks,
120 SIAM International Conference on Data Mining. 2009
121 https://doi.org/10.1137/1.9781611972795.61
122 .. [2] Ying, Xiaowei & Wu, Leting & Wu, Xintao. (2012).
123 A Spectrum-Based Framework for Quantifying Randomness of Social Networks.
124 IEEE Transactions on Knowledge and Data Engineering 23(12):1842--1856.
125 https://dl.acm.org/doi/abs/10.1109/TKDE.2010.218
126 """
127 import numpy as np
128
129 # corner case: graph has no edges
130 if nx.is_empty(G):
131 raise nx.NetworkXError("non_randomness not applicable to empty graphs")
132 if not nx.is_connected(G):
133 raise nx.NetworkXException("Non connected graph.")
134 if len(list(nx.selfloop_edges(G))) > 0:
135 raise nx.NetworkXError("Graph must not contain self-loops")
136
137 n = G.number_of_nodes()
138 m = G.number_of_edges()
139
140 if k is None:
141 k = len(tuple(nx.community.label_propagation_communities(G)))
142 if not 1 <= k < n or not 0 < (p := (2 * k * m) / (n * (n - k))) < 1:
143 err = (
144 f"invalid number of communities for graph with {n} nodes and {m} edges: {k}"
145 )
146 raise ValueError(err)
147
148 # eq. 4.4
149 eigenvalues = np.linalg.eigvals(nx.to_numpy_array(G, weight=weight))
150 nr = float(np.real(np.sum(eigenvalues[:k])))
151
152 # eq. 4.5
153 nr_rd = (nr - ((n - 2 * k) * p + k)) / math.sqrt(2 * k * p * (1 - p))
154
155 return nr, nr_rd