1"""PageRank analysis of graph structure."""
2
3import networkx as nx
4
5__all__ = ["pagerank", "google_matrix"]
6
7
8@nx._dispatchable(edge_attrs="weight")
9def pagerank(
10 G,
11 alpha=0.85,
12 personalization=None,
13 max_iter=100,
14 tol=1.0e-6,
15 nstart=None,
16 weight="weight",
17 dangling=None,
18):
19 """Returns the PageRank of the nodes in the graph.
20
21 PageRank computes a ranking of the nodes in the graph G based on
22 the structure of the incoming links. It was originally designed as
23 an algorithm to rank web pages.
24
25 Parameters
26 ----------
27 G : graph
28 A NetworkX graph. Undirected graphs will be converted to a directed
29 graph with two directed edges for each undirected edge.
30
31 alpha : float, optional
32 Damping parameter for PageRank, default=0.85.
33
34 personalization: dict, optional
35 The "personalization vector" consisting of a dictionary with a
36 key some subset of graph nodes and personalization value each of those.
37 At least one personalization value must be non-zero.
38 If not specified, a nodes personalization value will be zero.
39 By default, a uniform distribution is used.
40
41 max_iter : integer, optional
42 Maximum number of iterations in power method eigenvalue solver.
43
44 tol : float, optional
45 Error tolerance used to check convergence in power method solver.
46 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
47
48 nstart : dictionary, optional
49 Starting value of PageRank iteration for each node.
50
51 weight : key, optional
52 Edge data key to use as weight. If None weights are set to 1.
53
54 dangling: dict, optional
55 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
56 any outedges. The dict key is the node the outedge points to and the dict
57 value is the weight of that outedge. By default, dangling nodes are given
58 outedges according to the personalization vector (uniform if not
59 specified). This must be selected to result in an irreducible transition
60 matrix (see notes under google_matrix). It may be common to have the
61 dangling dict to be the same as the personalization dict.
62
63
64 Returns
65 -------
66 pagerank : dictionary
67 Dictionary of nodes with PageRank as value
68
69 Examples
70 --------
71 >>> G = nx.DiGraph(nx.path_graph(4))
72 >>> pr = nx.pagerank(G, alpha=0.9)
73
74 Notes
75 -----
76 The eigenvector calculation is done by the power iteration method
77 and has no guarantee of convergence. The iteration will stop after
78 an error tolerance of ``len(G) * tol`` has been reached. If the
79 number of iterations exceed `max_iter`, a
80 :exc:`networkx.exception.PowerIterationFailedConvergence` exception
81 is raised.
82
83 The PageRank algorithm was designed for directed graphs but this
84 algorithm does not check if the input graph is directed and will
85 execute on undirected graphs by converting each edge in the
86 directed graph to two edges.
87
88 See Also
89 --------
90 google_matrix
91 :func:`~networkx.algorithms.bipartite.link_analysis.birank`
92
93 Raises
94 ------
95 PowerIterationFailedConvergence
96 If the algorithm fails to converge to the specified tolerance
97 within the specified number of iterations of the power iteration
98 method.
99
100 References
101 ----------
102 .. [1] A. Langville and C. Meyer,
103 "A survey of eigenvector methods of web information retrieval."
104 http://citeseer.ist.psu.edu/713792.html
105 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
106 The PageRank citation ranking: Bringing order to the Web. 1999
107 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
108
109 """
110 return _pagerank_scipy(
111 G, alpha, personalization, max_iter, tol, nstart, weight, dangling
112 )
113
114
115def _pagerank_python(
116 G,
117 alpha=0.85,
118 personalization=None,
119 max_iter=100,
120 tol=1.0e-6,
121 nstart=None,
122 weight="weight",
123 dangling=None,
124):
125 if len(G) == 0:
126 return {}
127
128 D = G.to_directed()
129
130 # Create a copy in (right) stochastic form
131 W = nx.stochastic_graph(D, weight=weight)
132 N = W.number_of_nodes()
133
134 # Choose fixed starting vector if not given
135 if nstart is None:
136 x = dict.fromkeys(W, 1.0 / N)
137 else:
138 # Normalized nstart vector
139 s = sum(nstart.values())
140 x = {k: v / s for k, v in nstart.items()}
141
142 if personalization is None:
143 # Assign uniform personalization vector if not given
144 p = dict.fromkeys(W, 1.0 / N)
145 else:
146 s = sum(personalization.values())
147 p = {k: v / s for k, v in personalization.items()}
148
149 if dangling is None:
150 # Use personalization vector if dangling vector not specified
151 dangling_weights = p
152 else:
153 s = sum(dangling.values())
154 dangling_weights = {k: v / s for k, v in dangling.items()}
155 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
156
157 # power iteration: make up to max_iter iterations
158 for _ in range(max_iter):
159 xlast = x
160 x = dict.fromkeys(xlast.keys(), 0)
161 danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
162 for n in x:
163 # this matrix multiply looks odd because it is
164 # doing a left multiply x^T=xlast^T*W
165 for _, nbr, wt in W.edges(n, data=weight):
166 x[nbr] += alpha * xlast[n] * wt
167 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
168 # check convergence, l1 norm
169 err = sum(abs(x[n] - xlast[n]) for n in x)
170 if err < N * tol:
171 return x
172 raise nx.PowerIterationFailedConvergence(max_iter)
173
174
175@nx._dispatchable(edge_attrs="weight")
176def google_matrix(
177 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
178):
179 """Returns the Google matrix of the graph.
180
181 Parameters
182 ----------
183 G : graph
184 A NetworkX graph. Undirected graphs will be converted to a directed
185 graph with two directed edges for each undirected edge.
186
187 alpha : float
188 The damping factor.
189
190 personalization: dict, optional
191 The "personalization vector" consisting of a dictionary with a
192 key some subset of graph nodes and personalization value each of those.
193 At least one personalization value must be non-zero.
194 If not specified, a nodes personalization value will be zero.
195 By default, a uniform distribution is used.
196
197 nodelist : list, optional
198 The rows and columns are ordered according to the nodes in nodelist.
199 If nodelist is None, then the ordering is produced by G.nodes().
200
201 weight : key, optional
202 Edge data key to use as weight. If None weights are set to 1.
203
204 dangling: dict, optional
205 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
206 any outedges. The dict key is the node the outedge points to and the dict
207 value is the weight of that outedge. By default, dangling nodes are given
208 outedges according to the personalization vector (uniform if not
209 specified) This must be selected to result in an irreducible transition
210 matrix (see notes below). It may be common to have the dangling dict to
211 be the same as the personalization dict.
212
213 Returns
214 -------
215 A : 2D NumPy ndarray
216 Google matrix of the graph
217
218 Notes
219 -----
220 The array returned represents the transition matrix that describes the
221 Markov chain used in PageRank. For PageRank to converge to a unique
222 solution (i.e., a unique stationary distribution in a Markov chain), the
223 transition matrix must be irreducible. In other words, it must be that
224 there exists a path between every pair of nodes in the graph, or else there
225 is the potential of "rank sinks."
226
227 This implementation works with Multi(Di)Graphs. For multigraphs the
228 weight between two nodes is set to be the sum of all edge weights
229 between those nodes.
230
231 See Also
232 --------
233 pagerank
234 """
235 import numpy as np
236
237 if nodelist is None:
238 nodelist = list(G)
239
240 A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
241 N = len(G)
242 if N == 0:
243 return A
244
245 # Personalization vector
246 if personalization is None:
247 p = np.repeat(1.0 / N, N)
248 else:
249 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
250 if p.sum() == 0:
251 raise ZeroDivisionError
252 p /= p.sum()
253
254 # Dangling nodes
255 if dangling is None:
256 dangling_weights = p
257 else:
258 # Convert the dangling dictionary into an array in nodelist order
259 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
260 dangling_weights /= dangling_weights.sum()
261 dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
262
263 # Assign dangling_weights to any dangling nodes (nodes with no out links)
264 A[dangling_nodes] = dangling_weights
265
266 A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1
267
268 return alpha * A + (1 - alpha) * p
269
270
271def _pagerank_numpy(
272 G, alpha=0.85, personalization=None, weight="weight", dangling=None
273):
274 """Returns the PageRank of the nodes in the graph.
275
276 PageRank computes a ranking of the nodes in the graph G based on
277 the structure of the incoming links. It was originally designed as
278 an algorithm to rank web pages.
279
280 Parameters
281 ----------
282 G : graph
283 A NetworkX graph. Undirected graphs will be converted to a directed
284 graph with two directed edges for each undirected edge.
285
286 alpha : float, optional
287 Damping parameter for PageRank, default=0.85.
288
289 personalization: dict, optional
290 The "personalization vector" consisting of a dictionary with a
291 key some subset of graph nodes and personalization value each of those.
292 At least one personalization value must be non-zero.
293 If not specified, a nodes personalization value will be zero.
294 By default, a uniform distribution is used.
295
296 weight : key, optional
297 Edge data key to use as weight. If None weights are set to 1.
298
299 dangling: dict, optional
300 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
301 any outedges. The dict key is the node the outedge points to and the dict
302 value is the weight of that outedge. By default, dangling nodes are given
303 outedges according to the personalization vector (uniform if not
304 specified) This must be selected to result in an irreducible transition
305 matrix (see notes under google_matrix). It may be common to have the
306 dangling dict to be the same as the personalization dict.
307
308 Returns
309 -------
310 pagerank : dictionary
311 Dictionary of nodes with PageRank as value.
312
313 Examples
314 --------
315 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy
316 >>> G = nx.DiGraph(nx.path_graph(4))
317 >>> pr = _pagerank_numpy(G, alpha=0.9)
318
319 Notes
320 -----
321 The eigenvector calculation uses NumPy's interface to the LAPACK
322 eigenvalue solvers. This will be the fastest and most accurate
323 for small graphs.
324
325 This implementation works with Multi(Di)Graphs. For multigraphs the
326 weight between two nodes is set to be the sum of all edge weights
327 between those nodes.
328
329 See Also
330 --------
331 pagerank, google_matrix
332
333 References
334 ----------
335 .. [1] A. Langville and C. Meyer,
336 "A survey of eigenvector methods of web information retrieval."
337 http://citeseer.ist.psu.edu/713792.html
338 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
339 The PageRank citation ranking: Bringing order to the Web. 1999
340 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
341 """
342 import numpy as np
343
344 if len(G) == 0:
345 return {}
346 M = google_matrix(
347 G, alpha, personalization=personalization, weight=weight, dangling=dangling
348 )
349 # use numpy LAPACK solver
350 eigenvalues, eigenvectors = np.linalg.eig(M.T)
351 ind = np.argmax(eigenvalues)
352 # eigenvector of largest eigenvalue is at ind, normalized
353 largest = np.array(eigenvectors[:, ind]).flatten().real
354 norm = largest.sum()
355 return dict(zip(G, map(float, largest / norm)))
356
357
358def _pagerank_scipy(
359 G,
360 alpha=0.85,
361 personalization=None,
362 max_iter=100,
363 tol=1.0e-6,
364 nstart=None,
365 weight="weight",
366 dangling=None,
367):
368 """Returns the PageRank of the nodes in the graph.
369
370 PageRank computes a ranking of the nodes in the graph G based on
371 the structure of the incoming links. It was originally designed as
372 an algorithm to rank web pages.
373
374 Parameters
375 ----------
376 G : graph
377 A NetworkX graph. Undirected graphs will be converted to a directed
378 graph with two directed edges for each undirected edge.
379
380 alpha : float, optional
381 Damping parameter for PageRank, default=0.85.
382
383 personalization: dict, optional
384 The "personalization vector" consisting of a dictionary with a
385 key some subset of graph nodes and personalization value each of those.
386 At least one personalization value must be non-zero.
387 If not specified, a nodes personalization value will be zero.
388 By default, a uniform distribution is used.
389
390 max_iter : integer, optional
391 Maximum number of iterations in power method eigenvalue solver.
392
393 tol : float, optional
394 Error tolerance used to check convergence in power method solver.
395 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
396
397 nstart : dictionary, optional
398 Starting value of PageRank iteration for each node.
399
400 weight : key, optional
401 Edge data key to use as weight. If None weights are set to 1.
402
403 dangling: dict, optional
404 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
405 any outedges. The dict key is the node the outedge points to and the dict
406 value is the weight of that outedge. By default, dangling nodes are given
407 outedges according to the personalization vector (uniform if not
408 specified) This must be selected to result in an irreducible transition
409 matrix (see notes under google_matrix). It may be common to have the
410 dangling dict to be the same as the personalization dict.
411
412 Returns
413 -------
414 pagerank : dictionary
415 Dictionary of nodes with PageRank as value
416
417 Examples
418 --------
419 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy
420 >>> G = nx.DiGraph(nx.path_graph(4))
421 >>> pr = _pagerank_scipy(G, alpha=0.9)
422
423 Notes
424 -----
425 The eigenvector calculation uses power iteration with a SciPy
426 sparse matrix representation.
427
428 This implementation works with Multi(Di)Graphs. For multigraphs the
429 weight between two nodes is set to be the sum of all edge weights
430 between those nodes.
431
432 See Also
433 --------
434 pagerank
435
436 Raises
437 ------
438 PowerIterationFailedConvergence
439 If the algorithm fails to converge to the specified tolerance
440 within the specified number of iterations of the power iteration
441 method.
442
443 References
444 ----------
445 .. [1] A. Langville and C. Meyer,
446 "A survey of eigenvector methods of web information retrieval."
447 http://citeseer.ist.psu.edu/713792.html
448 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
449 The PageRank citation ranking: Bringing order to the Web. 1999
450 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
451 """
452 import numpy as np
453 import scipy as sp
454
455 N = len(G)
456 if N == 0:
457 return {}
458
459 nodelist = list(G)
460 A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
461 S = A.sum(axis=1)
462 S[S != 0] = 1.0 / S[S != 0]
463 Q = sp.sparse.dia_array((S.T, 0), shape=A.shape).tocsr()
464 A = Q @ A
465
466 # initial vector
467 if nstart is None:
468 x = np.repeat(1.0 / N, N)
469 else:
470 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
471 x /= x.sum()
472
473 # Personalization vector
474 if personalization is None:
475 p = np.repeat(1.0 / N, N)
476 else:
477 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
478 if p.sum() == 0:
479 raise ZeroDivisionError
480 p /= p.sum()
481 # Dangling nodes
482 if dangling is None:
483 dangling_weights = p
484 else:
485 # Convert the dangling dictionary into an array in nodelist order
486 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
487 dangling_weights /= dangling_weights.sum()
488 is_dangling = np.where(S == 0)[0]
489
490 # power iteration: make up to max_iter iterations
491 for _ in range(max_iter):
492 xlast = x
493 x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
494 # check convergence, l1 norm
495 err = np.absolute(x - xlast).sum()
496 if err < N * tol:
497 return dict(zip(nodelist, map(float, x)))
498 raise nx.PowerIterationFailedConvergence(max_iter)