1"""Current-flow betweenness centrality measures."""
2
3import networkx as nx
4from networkx.algorithms.centrality.flow_matrix import (
5 CGInverseLaplacian,
6 FullInverseLaplacian,
7 SuperLUInverseLaplacian,
8 flow_matrix_row,
9)
10from networkx.utils import (
11 not_implemented_for,
12 py_random_state,
13 reverse_cuthill_mckee_ordering,
14)
15
16__all__ = [
17 "current_flow_betweenness_centrality",
18 "approximate_current_flow_betweenness_centrality",
19 "edge_current_flow_betweenness_centrality",
20]
21
22
23@not_implemented_for("directed")
24@py_random_state("seed")
25@nx._dispatchable(edge_attrs="weight")
26def approximate_current_flow_betweenness_centrality(
27 G,
28 normalized=True,
29 weight=None,
30 dtype=float,
31 solver="full",
32 epsilon=0.5,
33 kmax=10000,
34 seed=None,
35 *,
36 sample_weight=1,
37):
38 r"""Compute the approximate current-flow betweenness centrality for nodes.
39
40 Approximates the current-flow betweenness centrality within absolute
41 error of epsilon with high probability [1]_.
42
43
44 Parameters
45 ----------
46 G : graph
47 A NetworkX graph
48
49 normalized : bool, optional (default=True)
50 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
51 n is the number of nodes in G.
52
53 weight : string or None, optional (default=None)
54 Key for edge data used as the edge weight.
55 If None, then use 1 as each edge weight.
56 The weight reflects the capacity or the strength of the
57 edge.
58
59 dtype : data type (float)
60 Default data type for internal matrices.
61 Set to np.float32 for lower memory consumption.
62
63 solver : string (default='full')
64 Type of linear solver to use for computing the flow matrix.
65 Options are "full" (uses most memory), "lu" (recommended), and
66 "cg" (uses least memory).
67
68 epsilon: float
69 Absolute error tolerance. Note that smaller values of `epsilon` lead to
70 higher numbers of sample pairs (``k``) and thus more computation time. The number
71 of sample pairs is approximately ``(c/epsilon)^2 * log(n)`` where ``n`` is the
72 number of nodes.
73
74 kmax: int
75 Maximum number of sample node pairs to use for approximation.
76
77 sample_weight : float (default=1)
78 Multiplicative factor for the number of sample node pairs used in approximation.
79 Higher values may improve accuracy at the expense of increased computation time.
80
81 seed : integer, random_state, or None (default)
82 Indicator of random number generation state.
83 See :ref:`Randomness<randomness>`.
84
85 Returns
86 -------
87 nodes : dictionary
88 Dictionary of nodes with betweenness centrality as the value.
89
90 See Also
91 --------
92 current_flow_betweenness_centrality
93
94 Notes
95 -----
96 The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
97 and the space required is $O(m)$ for $n$ nodes and $m$ edges.
98
99 If the edges have a 'weight' attribute they will be used as
100 weights in this algorithm. Unspecified weights are set to 1.
101
102 References
103 ----------
104 .. [1] Ulrik Brandes and Daniel Fleischer:
105 Centrality Measures Based on Current Flow.
106 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
107 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
108 https://doi.org/10.1007/978-3-540-31856-9_44
109 """
110 import numpy as np
111
112 if not nx.is_connected(G):
113 raise nx.NetworkXError("Graph not connected.")
114
115 n = G.number_of_nodes()
116
117 # For small graphs (n < 3), betweenness centrality is always 0 for all nodes
118 # since no node can be "between" any pair of other nodes
119 if n < 3:
120 return dict.fromkeys(G, 0.0)
121
122 if epsilon <= 0:
123 raise nx.NetworkXError(f"Epsilon must be positive. Got {epsilon=}.")
124
125 if sample_weight <= 0:
126 raise nx.NetworkXError(f"Sample weight must be positive. Got {sample_weight=}.")
127
128 nb = (n - 1.0) * (n - 2.0) # normalization factor
129 cstar = n * (n - 1) / nb
130 k = int(sample_weight * np.ceil((cstar / epsilon) ** 2 * np.log(n)))
131 if k > kmax:
132 msg = f"Number random pairs k>kmax ({k}>{kmax}) "
133 raise nx.NetworkXError(msg, "Increase kmax or epsilon")
134
135 solvername = {
136 "full": FullInverseLaplacian,
137 "lu": SuperLUInverseLaplacian,
138 "cg": CGInverseLaplacian,
139 }
140 ordering = list(reverse_cuthill_mckee_ordering(G))
141 # make a copy with integer labels according to rcm ordering
142 # this could be done without a copy if we really wanted to
143 H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
144 L = nx.laplacian_matrix(H, nodelist=range(n), weight=weight).asformat("csc")
145 L = L.astype(dtype)
146 C = solvername[solver](L, dtype=dtype) # initialize solver
147 betweenness = dict.fromkeys(H, 0.0)
148 cstar2k = cstar / (2 * k)
149 for _ in range(k):
150 s, t = pair = seed.sample(range(n), 2)
151 b = np.zeros(n, dtype=dtype)
152 b[s] = 1
153 b[t] = -1
154 p = C.solve(b)
155 for v in H:
156 if v in pair:
157 continue
158 for nbr in H[v]:
159 w = H[v][nbr].get(weight, 1.0)
160 betweenness[v] += float(w * np.abs(p[v] - p[nbr]) * cstar2k)
161 if normalized:
162 factor = 1.0
163 else:
164 factor = nb / 2.0
165 # remap to original node names and "unnormalize" if required
166 return {ordering[k]: v * factor for k, v in betweenness.items()}
167
168
169@not_implemented_for("directed")
170@nx._dispatchable(edge_attrs="weight")
171def current_flow_betweenness_centrality(
172 G, normalized=True, weight=None, dtype=float, solver="full"
173):
174 r"""Compute current-flow betweenness centrality for nodes.
175
176 Current-flow betweenness centrality uses an electrical current
177 model for information spreading in contrast to betweenness
178 centrality which uses shortest paths.
179
180 Current-flow betweenness centrality is also known as
181 random-walk betweenness centrality [2]_.
182
183 Parameters
184 ----------
185 G : graph
186 A NetworkX graph
187
188 normalized : bool, optional (default=True)
189 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
190 n is the number of nodes in G.
191
192 weight : string or None, optional (default=None)
193 Key for edge data used as the edge weight.
194 If None, then use 1 as each edge weight.
195 The weight reflects the capacity or the strength of the
196 edge.
197
198 dtype : data type (float)
199 Default data type for internal matrices.
200 Set to np.float32 for lower memory consumption.
201
202 solver : string (default='full')
203 Type of linear solver to use for computing the flow matrix.
204 Options are "full" (uses most memory), "lu" (recommended), and
205 "cg" (uses least memory).
206
207 Returns
208 -------
209 nodes : dictionary
210 Dictionary of nodes with betweenness centrality as the value.
211
212 See Also
213 --------
214 approximate_current_flow_betweenness_centrality
215 betweenness_centrality
216 edge_betweenness_centrality
217 edge_current_flow_betweenness_centrality
218
219 Notes
220 -----
221 Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
222 time [1]_, where $I(n-1)$ is the time needed to compute the
223 inverse Laplacian. For a full matrix this is $O(n^3)$ but using
224 sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
225 Laplacian matrix condition number.
226
227 The space required is $O(nw)$ where $w$ is the width of the sparse
228 Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
229
230 If the edges have a 'weight' attribute they will be used as
231 weights in this algorithm. Unspecified weights are set to 1.
232
233 References
234 ----------
235 .. [1] Centrality Measures Based on Current Flow.
236 Ulrik Brandes and Daniel Fleischer,
237 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
238 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
239 https://doi.org/10.1007/978-3-540-31856-9_44
240
241 .. [2] A measure of betweenness centrality based on random walks,
242 M. E. J. Newman, Social Networks 27, 39-54 (2005).
243 """
244 if not nx.is_connected(G):
245 raise nx.NetworkXError("Graph not connected.")
246 N = G.number_of_nodes()
247 ordering = list(reverse_cuthill_mckee_ordering(G))
248 # make a copy with integer labels according to rcm ordering
249 # this could be done without a copy if we really wanted to
250 H = nx.relabel_nodes(G, dict(zip(ordering, range(N))))
251 betweenness = dict.fromkeys(H, 0.0) # b[n]=0 for n in H
252 for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
253 pos = dict(zip(row.argsort()[::-1], range(N)))
254 for i in range(N):
255 betweenness[s] += (i - pos[i]) * row.item(i)
256 betweenness[t] += (N - i - 1 - pos[i]) * row.item(i)
257 if normalized:
258 nb = (N - 1.0) * (N - 2.0) # normalization factor
259 else:
260 nb = 2.0
261 return {ordering[n]: (b - n) * 2.0 / nb for n, b in betweenness.items()}
262
263
264@not_implemented_for("directed")
265@nx._dispatchable(edge_attrs="weight")
266def edge_current_flow_betweenness_centrality(
267 G, normalized=True, weight=None, dtype=float, solver="full"
268):
269 r"""Compute current-flow betweenness centrality for edges.
270
271 Current-flow betweenness centrality uses an electrical current
272 model for information spreading in contrast to betweenness
273 centrality which uses shortest paths.
274
275 Current-flow betweenness centrality is also known as
276 random-walk betweenness centrality [2]_.
277
278 Parameters
279 ----------
280 G : graph
281 A NetworkX graph
282
283 normalized : bool, optional (default=True)
284 If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
285 n is the number of nodes in G.
286
287 weight : string or None, optional (default=None)
288 Key for edge data used as the edge weight.
289 If None, then use 1 as each edge weight.
290 The weight reflects the capacity or the strength of the
291 edge.
292
293 dtype : data type (default=float)
294 Default data type for internal matrices.
295 Set to np.float32 for lower memory consumption.
296
297 solver : string (default='full')
298 Type of linear solver to use for computing the flow matrix.
299 Options are "full" (uses most memory), "lu" (recommended), and
300 "cg" (uses least memory).
301
302 Returns
303 -------
304 nodes : dictionary
305 Dictionary of edge tuples with betweenness centrality as the value.
306
307 Raises
308 ------
309 NetworkXError
310 The algorithm does not support DiGraphs.
311 If the input graph is an instance of DiGraph class, NetworkXError
312 is raised.
313
314 See Also
315 --------
316 betweenness_centrality
317 edge_betweenness_centrality
318 current_flow_betweenness_centrality
319
320 Notes
321 -----
322 Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
323 time [1]_, where $I(n-1)$ is the time needed to compute the
324 inverse Laplacian. For a full matrix this is $O(n^3)$ but using
325 sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
326 Laplacian matrix condition number.
327
328 The space required is $O(nw)$ where $w$ is the width of the sparse
329 Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
330
331 If the edges have a 'weight' attribute they will be used as
332 weights in this algorithm. Unspecified weights are set to 1.
333
334 References
335 ----------
336 .. [1] Centrality Measures Based on Current Flow.
337 Ulrik Brandes and Daniel Fleischer,
338 Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
339 LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
340 https://doi.org/10.1007/978-3-540-31856-9_44
341
342 .. [2] A measure of betweenness centrality based on random walks,
343 M. E. J. Newman, Social Networks 27, 39-54 (2005).
344 """
345 if not nx.is_connected(G):
346 raise nx.NetworkXError("Graph not connected.")
347 N = G.number_of_nodes()
348 ordering = list(reverse_cuthill_mckee_ordering(G))
349 # make a copy with integer labels according to rcm ordering
350 # this could be done without a copy if we really wanted to
351 H = nx.relabel_nodes(G, dict(zip(ordering, range(N))))
352 edges = (tuple(sorted((u, v))) for u, v in H.edges())
353 betweenness = dict.fromkeys(edges, 0.0)
354 if normalized:
355 nb = (N - 1.0) * (N - 2.0) # normalization factor
356 else:
357 nb = 2.0
358 for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
359 pos = dict(zip(row.argsort()[::-1], range(1, N + 1)))
360 for i in range(N):
361 betweenness[e] += (i + 1 - pos[i]) * row.item(i)
362 betweenness[e] += (N - i - pos[i]) * row.item(i)
363 betweenness[e] /= nb
364 return {(ordering[s], ordering[t]): b for (s, t), b in betweenness.items()}