Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/pagerank_alg.py: 10%
104 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""PageRank analysis of graph structure. """
2from warnings import warn
4import networkx as nx
6__all__ = ["pagerank", "google_matrix"]
9@nx._dispatch(edge_attrs="weight")
10def pagerank(
11 G,
12 alpha=0.85,
13 personalization=None,
14 max_iter=100,
15 tol=1.0e-6,
16 nstart=None,
17 weight="weight",
18 dangling=None,
19):
20 """Returns the PageRank of the nodes in the graph.
22 PageRank computes a ranking of the nodes in the graph G based on
23 the structure of the incoming links. It was originally designed as
24 an algorithm to rank web pages.
26 Parameters
27 ----------
28 G : graph
29 A NetworkX graph. Undirected graphs will be converted to a directed
30 graph with two directed edges for each undirected edge.
32 alpha : float, optional
33 Damping parameter for PageRank, default=0.85.
35 personalization: dict, optional
36 The "personalization vector" consisting of a dictionary with a
37 key some subset of graph nodes and personalization value each of those.
38 At least one personalization value must be non-zero.
39 If not specified, a nodes personalization value will be zero.
40 By default, a uniform distribution is used.
42 max_iter : integer, optional
43 Maximum number of iterations in power method eigenvalue solver.
45 tol : float, optional
46 Error tolerance used to check convergence in power method solver.
47 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
49 nstart : dictionary, optional
50 Starting value of PageRank iteration for each node.
52 weight : key, optional
53 Edge data key to use as weight. If None weights are set to 1.
55 dangling: dict, optional
56 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
57 any outedges. The dict key is the node the outedge points to and the dict
58 value is the weight of that outedge. By default, dangling nodes are given
59 outedges according to the personalization vector (uniform if not
60 specified). This must be selected to result in an irreducible transition
61 matrix (see notes under google_matrix). It may be common to have the
62 dangling dict to be the same as the personalization dict.
65 Returns
66 -------
67 pagerank : dictionary
68 Dictionary of nodes with PageRank as value
70 Examples
71 --------
72 >>> G = nx.DiGraph(nx.path_graph(4))
73 >>> pr = nx.pagerank(G, alpha=0.9)
75 Notes
76 -----
77 The eigenvector calculation is done by the power iteration method
78 and has no guarantee of convergence. The iteration will stop after
79 an error tolerance of ``len(G) * tol`` has been reached. If the
80 number of iterations exceed `max_iter`, a
81 :exc:`networkx.exception.PowerIterationFailedConvergence` exception
82 is raised.
84 The PageRank algorithm was designed for directed graphs but this
85 algorithm does not check if the input graph is directed and will
86 execute on undirected graphs by converting each edge in the
87 directed graph to two edges.
89 See Also
90 --------
91 google_matrix
93 Raises
94 ------
95 PowerIterationFailedConvergence
96 If the algorithm fails to converge to the specified tolerance
97 within the specified number of iterations of the power iteration
98 method.
100 References
101 ----------
102 .. [1] A. Langville and C. Meyer,
103 "A survey of eigenvector methods of web information retrieval."
104 http://citeseer.ist.psu.edu/713792.html
105 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
106 The PageRank citation ranking: Bringing order to the Web. 1999
107 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
109 """
110 return _pagerank_scipy(
111 G, alpha, personalization, max_iter, tol, nstart, weight, dangling
112 )
115def _pagerank_python(
116 G,
117 alpha=0.85,
118 personalization=None,
119 max_iter=100,
120 tol=1.0e-6,
121 nstart=None,
122 weight="weight",
123 dangling=None,
124):
125 if len(G) == 0:
126 return {}
128 D = G.to_directed()
130 # Create a copy in (right) stochastic form
131 W = nx.stochastic_graph(D, weight=weight)
132 N = W.number_of_nodes()
134 # Choose fixed starting vector if not given
135 if nstart is None:
136 x = dict.fromkeys(W, 1.0 / N)
137 else:
138 # Normalized nstart vector
139 s = sum(nstart.values())
140 x = {k: v / s for k, v in nstart.items()}
142 if personalization is None:
143 # Assign uniform personalization vector if not given
144 p = dict.fromkeys(W, 1.0 / N)
145 else:
146 s = sum(personalization.values())
147 p = {k: v / s for k, v in personalization.items()}
149 if dangling is None:
150 # Use personalization vector if dangling vector not specified
151 dangling_weights = p
152 else:
153 s = sum(dangling.values())
154 dangling_weights = {k: v / s for k, v in dangling.items()}
155 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
157 # power iteration: make up to max_iter iterations
158 for _ in range(max_iter):
159 xlast = x
160 x = dict.fromkeys(xlast.keys(), 0)
161 danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
162 for n in x:
163 # this matrix multiply looks odd because it is
164 # doing a left multiply x^T=xlast^T*W
165 for _, nbr, wt in W.edges(n, data=weight):
166 x[nbr] += alpha * xlast[n] * wt
167 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
168 # check convergence, l1 norm
169 err = sum(abs(x[n] - xlast[n]) for n in x)
170 if err < N * tol:
171 return x
172 raise nx.PowerIterationFailedConvergence(max_iter)
175@nx._dispatch(edge_attrs="weight")
176def google_matrix(
177 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
178):
179 """Returns the Google matrix of the graph.
181 Parameters
182 ----------
183 G : graph
184 A NetworkX graph. Undirected graphs will be converted to a directed
185 graph with two directed edges for each undirected edge.
187 alpha : float
188 The damping factor.
190 personalization: dict, optional
191 The "personalization vector" consisting of a dictionary with a
192 key some subset of graph nodes and personalization value each of those.
193 At least one personalization value must be non-zero.
194 If not specified, a nodes personalization value will be zero.
195 By default, a uniform distribution is used.
197 nodelist : list, optional
198 The rows and columns are ordered according to the nodes in nodelist.
199 If nodelist is None, then the ordering is produced by G.nodes().
201 weight : key, optional
202 Edge data key to use as weight. If None weights are set to 1.
204 dangling: dict, optional
205 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
206 any outedges. The dict key is the node the outedge points to and the dict
207 value is the weight of that outedge. By default, dangling nodes are given
208 outedges according to the personalization vector (uniform if not
209 specified) This must be selected to result in an irreducible transition
210 matrix (see notes below). It may be common to have the dangling dict to
211 be the same as the personalization dict.
213 Returns
214 -------
215 A : 2D NumPy ndarray
216 Google matrix of the graph
218 Notes
219 -----
220 The array returned represents the transition matrix that describes the
221 Markov chain used in PageRank. For PageRank to converge to a unique
222 solution (i.e., a unique stationary distribution in a Markov chain), the
223 transition matrix must be irreducible. In other words, it must be that
224 there exists a path between every pair of nodes in the graph, or else there
225 is the potential of "rank sinks."
227 This implementation works with Multi(Di)Graphs. For multigraphs the
228 weight between two nodes is set to be the sum of all edge weights
229 between those nodes.
231 See Also
232 --------
233 pagerank
234 """
235 import numpy as np
237 if nodelist is None:
238 nodelist = list(G)
240 A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
241 N = len(G)
242 if N == 0:
243 return A
245 # Personalization vector
246 if personalization is None:
247 p = np.repeat(1.0 / N, N)
248 else:
249 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
250 if p.sum() == 0:
251 raise ZeroDivisionError
252 p /= p.sum()
254 # Dangling nodes
255 if dangling is None:
256 dangling_weights = p
257 else:
258 # Convert the dangling dictionary into an array in nodelist order
259 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
260 dangling_weights /= dangling_weights.sum()
261 dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
263 # Assign dangling_weights to any dangling nodes (nodes with no out links)
264 A[dangling_nodes] = dangling_weights
266 A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1
268 return alpha * A + (1 - alpha) * p
271def _pagerank_numpy(
272 G, alpha=0.85, personalization=None, weight="weight", dangling=None
273):
274 """Returns the PageRank of the nodes in the graph.
276 PageRank computes a ranking of the nodes in the graph G based on
277 the structure of the incoming links. It was originally designed as
278 an algorithm to rank web pages.
280 Parameters
281 ----------
282 G : graph
283 A NetworkX graph. Undirected graphs will be converted to a directed
284 graph with two directed edges for each undirected edge.
286 alpha : float, optional
287 Damping parameter for PageRank, default=0.85.
289 personalization: dict, optional
290 The "personalization vector" consisting of a dictionary with a
291 key some subset of graph nodes and personalization value each of those.
292 At least one personalization value must be non-zero.
293 If not specified, a nodes personalization value will be zero.
294 By default, a uniform distribution is used.
296 weight : key, optional
297 Edge data key to use as weight. If None weights are set to 1.
299 dangling: dict, optional
300 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
301 any outedges. The dict key is the node the outedge points to and the dict
302 value is the weight of that outedge. By default, dangling nodes are given
303 outedges according to the personalization vector (uniform if not
304 specified) This must be selected to result in an irreducible transition
305 matrix (see notes under google_matrix). It may be common to have the
306 dangling dict to be the same as the personalization dict.
308 Returns
309 -------
310 pagerank : dictionary
311 Dictionary of nodes with PageRank as value.
313 Examples
314 --------
315 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy
316 >>> G = nx.DiGraph(nx.path_graph(4))
317 >>> pr = _pagerank_numpy(G, alpha=0.9)
319 Notes
320 -----
321 The eigenvector calculation uses NumPy's interface to the LAPACK
322 eigenvalue solvers. This will be the fastest and most accurate
323 for small graphs.
325 This implementation works with Multi(Di)Graphs. For multigraphs the
326 weight between two nodes is set to be the sum of all edge weights
327 between those nodes.
329 See Also
330 --------
331 pagerank, google_matrix
333 References
334 ----------
335 .. [1] A. Langville and C. Meyer,
336 "A survey of eigenvector methods of web information retrieval."
337 http://citeseer.ist.psu.edu/713792.html
338 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
339 The PageRank citation ranking: Bringing order to the Web. 1999
340 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
341 """
342 import numpy as np
344 if len(G) == 0:
345 return {}
346 M = google_matrix(
347 G, alpha, personalization=personalization, weight=weight, dangling=dangling
348 )
349 # use numpy LAPACK solver
350 eigenvalues, eigenvectors = np.linalg.eig(M.T)
351 ind = np.argmax(eigenvalues)
352 # eigenvector of largest eigenvalue is at ind, normalized
353 largest = np.array(eigenvectors[:, ind]).flatten().real
354 norm = largest.sum()
355 return dict(zip(G, map(float, largest / norm)))
358def _pagerank_scipy(
359 G,
360 alpha=0.85,
361 personalization=None,
362 max_iter=100,
363 tol=1.0e-6,
364 nstart=None,
365 weight="weight",
366 dangling=None,
367):
368 """Returns the PageRank of the nodes in the graph.
370 PageRank computes a ranking of the nodes in the graph G based on
371 the structure of the incoming links. It was originally designed as
372 an algorithm to rank web pages.
374 Parameters
375 ----------
376 G : graph
377 A NetworkX graph. Undirected graphs will be converted to a directed
378 graph with two directed edges for each undirected edge.
380 alpha : float, optional
381 Damping parameter for PageRank, default=0.85.
383 personalization: dict, optional
384 The "personalization vector" consisting of a dictionary with a
385 key some subset of graph nodes and personalization value each of those.
386 At least one personalization value must be non-zero.
387 If not specified, a nodes personalization value will be zero.
388 By default, a uniform distribution is used.
390 max_iter : integer, optional
391 Maximum number of iterations in power method eigenvalue solver.
393 tol : float, optional
394 Error tolerance used to check convergence in power method solver.
395 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
397 nstart : dictionary, optional
398 Starting value of PageRank iteration for each node.
400 weight : key, optional
401 Edge data key to use as weight. If None weights are set to 1.
403 dangling: dict, optional
404 The outedges to be assigned to any "dangling" nodes, i.e., nodes without
405 any outedges. The dict key is the node the outedge points to and the dict
406 value is the weight of that outedge. By default, dangling nodes are given
407 outedges according to the personalization vector (uniform if not
408 specified) This must be selected to result in an irreducible transition
409 matrix (see notes under google_matrix). It may be common to have the
410 dangling dict to be the same as the personalization dict.
412 Returns
413 -------
414 pagerank : dictionary
415 Dictionary of nodes with PageRank as value
417 Examples
418 --------
419 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy
420 >>> G = nx.DiGraph(nx.path_graph(4))
421 >>> pr = _pagerank_scipy(G, alpha=0.9)
423 Notes
424 -----
425 The eigenvector calculation uses power iteration with a SciPy
426 sparse matrix representation.
428 This implementation works with Multi(Di)Graphs. For multigraphs the
429 weight between two nodes is set to be the sum of all edge weights
430 between those nodes.
432 See Also
433 --------
434 pagerank
436 Raises
437 ------
438 PowerIterationFailedConvergence
439 If the algorithm fails to converge to the specified tolerance
440 within the specified number of iterations of the power iteration
441 method.
443 References
444 ----------
445 .. [1] A. Langville and C. Meyer,
446 "A survey of eigenvector methods of web information retrieval."
447 http://citeseer.ist.psu.edu/713792.html
448 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
449 The PageRank citation ranking: Bringing order to the Web. 1999
450 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
451 """
452 import numpy as np
453 import scipy as sp
455 N = len(G)
456 if N == 0:
457 return {}
459 nodelist = list(G)
460 A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
461 S = A.sum(axis=1)
462 S[S != 0] = 1.0 / S[S != 0]
463 # TODO: csr_array
464 Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
465 A = Q @ A
467 # initial vector
468 if nstart is None:
469 x = np.repeat(1.0 / N, N)
470 else:
471 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
472 x /= x.sum()
474 # Personalization vector
475 if personalization is None:
476 p = np.repeat(1.0 / N, N)
477 else:
478 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
479 if p.sum() == 0:
480 raise ZeroDivisionError
481 p /= p.sum()
482 # Dangling nodes
483 if dangling is None:
484 dangling_weights = p
485 else:
486 # Convert the dangling dictionary into an array in nodelist order
487 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
488 dangling_weights /= dangling_weights.sum()
489 is_dangling = np.where(S == 0)[0]
491 # power iteration: make up to max_iter iterations
492 for _ in range(max_iter):
493 xlast = x
494 x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
495 # check convergence, l1 norm
496 err = np.absolute(x - xlast).sum()
497 if err < N * tol:
498 return dict(zip(nodelist, map(float, x)))
499 raise nx.PowerIterationFailedConvergence(max_iter)