Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/hits_alg.py: 6%
108 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Hubs and authorities analysis of graph structure.
2"""
3import networkx as nx
5__all__ = ["hits"]
8@nx._dispatch
9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
10 """Returns HITS hubs and authorities values for nodes.
12 The HITS algorithm computes two numbers for a node.
13 Authorities estimates the node value based on the incoming links.
14 Hubs estimates the node value based on outgoing links.
16 Parameters
17 ----------
18 G : graph
19 A NetworkX graph
21 max_iter : integer, optional
22 Maximum number of iterations in power method.
24 tol : float, optional
25 Error tolerance used to check convergence in power method iteration.
27 nstart : dictionary, optional
28 Starting value of each node for power method iteration.
30 normalized : bool (default=True)
31 Normalize results by the sum of all of the values.
33 Returns
34 -------
35 (hubs,authorities) : two-tuple of dictionaries
36 Two dictionaries keyed by node containing the hub and authority
37 values.
39 Raises
40 ------
41 PowerIterationFailedConvergence
42 If the algorithm fails to converge to the specified tolerance
43 within the specified number of iterations of the power iteration
44 method.
46 Examples
47 --------
48 >>> G = nx.path_graph(4)
49 >>> h, a = nx.hits(G)
51 Notes
52 -----
53 The eigenvector calculation is done by the power iteration method
54 and has no guarantee of convergence. The iteration will stop
55 after max_iter iterations or an error tolerance of
56 number_of_nodes(G)*tol has been reached.
58 The HITS algorithm was designed for directed graphs but this
59 algorithm does not check if the input graph is directed and will
60 execute on undirected graphs.
62 References
63 ----------
64 .. [1] A. Langville and C. Meyer,
65 "A survey of eigenvector methods of web information retrieval."
66 http://citeseer.ist.psu.edu/713792.html
67 .. [2] Jon Kleinberg,
68 Authoritative sources in a hyperlinked environment
69 Journal of the ACM 46 (5): 604-32, 1999.
70 doi:10.1145/324133.324140.
71 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
72 """
73 import numpy as np
74 import scipy as sp
76 if len(G) == 0:
77 return {}, {}
78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)
80 if nstart is None:
81 _, _, vt = sp.sparse.linalg.svds(A, k=1, maxiter=max_iter, tol=tol)
82 else:
83 nstart = np.array(list(nstart.values()))
84 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)
86 a = vt.flatten().real
87 h = A @ a
88 if normalized:
89 h /= h.sum()
90 a /= a.sum()
91 hubs = dict(zip(G, map(float, h)))
92 authorities = dict(zip(G, map(float, a)))
93 return hubs, authorities
96def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
97 if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
98 raise Exception("hits() not defined for graphs with multiedges.")
99 if len(G) == 0:
100 return {}, {}
101 # choose fixed starting vector if not given
102 if nstart is None:
103 h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
104 else:
105 h = nstart
106 # normalize starting vector
107 s = 1.0 / sum(h.values())
108 for k in h:
109 h[k] *= s
110 for _ in range(max_iter): # power iteration: make up to max_iter iterations
111 hlast = h
112 h = dict.fromkeys(hlast.keys(), 0)
113 a = dict.fromkeys(hlast.keys(), 0)
114 # this "matrix multiply" looks odd because it is
115 # doing a left multiply a^T=hlast^T*G
116 for n in h:
117 for nbr in G[n]:
118 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
119 # now multiply h=Ga
120 for n in h:
121 for nbr in G[n]:
122 h[n] += a[nbr] * G[n][nbr].get("weight", 1)
123 # normalize vector
124 s = 1.0 / max(h.values())
125 for n in h:
126 h[n] *= s
127 # normalize vector
128 s = 1.0 / max(a.values())
129 for n in a:
130 a[n] *= s
131 # check convergence, l1 norm
132 err = sum(abs(h[n] - hlast[n]) for n in h)
133 if err < tol:
134 break
135 else:
136 raise nx.PowerIterationFailedConvergence(max_iter)
137 if normalized:
138 s = 1.0 / sum(a.values())
139 for n in a:
140 a[n] *= s
141 s = 1.0 / sum(h.values())
142 for n in h:
143 h[n] *= s
144 return h, a
147def _hits_numpy(G, normalized=True):
148 """Returns HITS hubs and authorities values for nodes.
150 The HITS algorithm computes two numbers for a node.
151 Authorities estimates the node value based on the incoming links.
152 Hubs estimates the node value based on outgoing links.
154 Parameters
155 ----------
156 G : graph
157 A NetworkX graph
159 normalized : bool (default=True)
160 Normalize results by the sum of all of the values.
162 Returns
163 -------
164 (hubs,authorities) : two-tuple of dictionaries
165 Two dictionaries keyed by node containing the hub and authority
166 values.
168 Examples
169 --------
170 >>> G = nx.path_graph(4)
172 The `hubs` and `authorities` are given by the eigenvectors corresponding to the
173 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.
175 The ``hubs`` and ``authority`` matrices are computed from the adjacency
176 matrix:
178 >>> adj_ary = nx.to_numpy_array(G)
179 >>> hubs_matrix = adj_ary @ adj_ary.T
180 >>> authority_matrix = adj_ary.T @ adj_ary
182 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue
183 of the respective matrices to the nodes in `G`:
185 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy
186 >>> hubs, authority = _hits_numpy(G)
188 Notes
189 -----
190 The eigenvector calculation uses NumPy's interface to LAPACK.
192 The HITS algorithm was designed for directed graphs but this
193 algorithm does not check if the input graph is directed and will
194 execute on undirected graphs.
196 References
197 ----------
198 .. [1] A. Langville and C. Meyer,
199 "A survey of eigenvector methods of web information retrieval."
200 http://citeseer.ist.psu.edu/713792.html
201 .. [2] Jon Kleinberg,
202 Authoritative sources in a hyperlinked environment
203 Journal of the ACM 46 (5): 604-32, 1999.
204 doi:10.1145/324133.324140.
205 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
206 """
207 import numpy as np
209 if len(G) == 0:
210 return {}, {}
211 adj_ary = nx.to_numpy_array(G)
212 # Hub matrix
213 H = adj_ary @ adj_ary.T
214 e, ev = np.linalg.eig(H)
215 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
216 # Authority matrix
217 A = adj_ary.T @ adj_ary
218 e, ev = np.linalg.eig(A)
219 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
220 if normalized:
221 h /= h.sum()
222 a /= a.sum()
223 else:
224 h /= h.max()
225 a /= a.max()
226 hubs = dict(zip(G, map(float, h)))
227 authorities = dict(zip(G, map(float, a)))
228 return hubs, authorities
231def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
232 """Returns HITS hubs and authorities values for nodes.
235 The HITS algorithm computes two numbers for a node.
236 Authorities estimates the node value based on the incoming links.
237 Hubs estimates the node value based on outgoing links.
239 Parameters
240 ----------
241 G : graph
242 A NetworkX graph
244 max_iter : integer, optional
245 Maximum number of iterations in power method.
247 tol : float, optional
248 Error tolerance used to check convergence in power method iteration.
250 nstart : dictionary, optional
251 Starting value of each node for power method iteration.
253 normalized : bool (default=True)
254 Normalize results by the sum of all of the values.
256 Returns
257 -------
258 (hubs,authorities) : two-tuple of dictionaries
259 Two dictionaries keyed by node containing the hub and authority
260 values.
262 Examples
263 --------
264 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy
265 >>> G = nx.path_graph(4)
266 >>> h, a = _hits_scipy(G)
268 Notes
269 -----
270 This implementation uses SciPy sparse matrices.
272 The eigenvector calculation is done by the power iteration method
273 and has no guarantee of convergence. The iteration will stop
274 after max_iter iterations or an error tolerance of
275 number_of_nodes(G)*tol has been reached.
277 The HITS algorithm was designed for directed graphs but this
278 algorithm does not check if the input graph is directed and will
279 execute on undirected graphs.
281 Raises
282 ------
283 PowerIterationFailedConvergence
284 If the algorithm fails to converge to the specified tolerance
285 within the specified number of iterations of the power iteration
286 method.
288 References
289 ----------
290 .. [1] A. Langville and C. Meyer,
291 "A survey of eigenvector methods of web information retrieval."
292 http://citeseer.ist.psu.edu/713792.html
293 .. [2] Jon Kleinberg,
294 Authoritative sources in a hyperlinked environment
295 Journal of the ACM 46 (5): 604-632, 1999.
296 doi:10.1145/324133.324140.
297 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
298 """
299 import numpy as np
301 if len(G) == 0:
302 return {}, {}
303 A = nx.to_scipy_sparse_array(G, nodelist=list(G))
304 (n, _) = A.shape # should be square
305 ATA = A.T @ A # authority matrix
306 # choose fixed starting vector if not given
307 if nstart is None:
308 x = np.ones((n, 1)) / n
309 else:
310 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
311 x /= x.sum()
313 # power iteration on authority matrix
314 i = 0
315 while True:
316 xlast = x
317 x = ATA @ x
318 x /= x.max()
319 # check convergence, l1 norm
320 err = np.absolute(x - xlast).sum()
321 if err < tol:
322 break
323 if i > max_iter:
324 raise nx.PowerIterationFailedConvergence(max_iter)
325 i += 1
327 a = x.flatten()
328 h = A @ a
329 if normalized:
330 h /= h.sum()
331 a /= a.sum()
332 hubs = dict(zip(G, map(float, h)))
333 authorities = dict(zip(G, map(float, a)))
334 return hubs, authorities