1"""Hubs and authorities analysis of graph structure."""
2
3import networkx as nx
4
5__all__ = ["hits"]
6
7
8@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}})
9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
10 """Returns HITS hubs and authorities values for nodes.
11
12 The HITS algorithm computes two numbers for a node.
13 Authorities estimates the node value based on the incoming links.
14 Hubs estimates the node value based on outgoing links.
15
16 Parameters
17 ----------
18 G : graph
19 A NetworkX graph
20
21 max_iter : integer, optional
22 Maximum number of iterations in power method.
23
24 tol : float, optional
25 Error tolerance used to check convergence in power method iteration.
26
27 nstart : dictionary, optional
28 Starting value of each node for power method iteration.
29
30 normalized : bool (default=True)
31 Normalize results by the sum of all of the values.
32
33 Returns
34 -------
35 (hubs,authorities) : two-tuple of dictionaries
36 Two dictionaries keyed by node containing the hub and authority
37 values.
38
39 Raises
40 ------
41 PowerIterationFailedConvergence
42 If the algorithm fails to converge to the specified tolerance
43 within the specified number of iterations of the power iteration
44 method.
45
46 Examples
47 --------
48 >>> G = nx.path_graph(4)
49 >>> h, a = nx.hits(G)
50
51 Notes
52 -----
53 The eigenvector calculation is done by the power iteration method
54 and has no guarantee of convergence. The iteration will stop
55 after max_iter iterations or an error tolerance of
56 number_of_nodes(G)*tol has been reached.
57
58 The HITS algorithm was designed for directed graphs but this
59 algorithm does not check if the input graph is directed and will
60 execute on undirected graphs.
61
62 References
63 ----------
64 .. [1] A. Langville and C. Meyer,
65 "A survey of eigenvector methods of web information retrieval."
66 https://epubs.siam.org/doi/epdf/10.1137/S0036144503424786
67 .. [2] Jon Kleinberg,
68 Authoritative sources in a hyperlinked environment
69 Journal of the ACM 46 (5): 604-32, 1999.
70 https://www.cs.cornell.edu/home/kleinber/auth.pdf
71 doi:10.1145/324133.324140.
72 """
73 import numpy as np
74 import scipy as sp
75
76 if len(G) == 0:
77 return {}, {}
78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)
79
80 if nstart is not None:
81 nstart = np.array(list(nstart.values()))
82 if max_iter <= 0:
83 raise nx.PowerIterationFailedConvergence(max_iter)
84 try:
85 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)
86 except sp.sparse.linalg.ArpackNoConvergence as exc:
87 raise nx.PowerIterationFailedConvergence(max_iter) from exc
88
89 a = vt.flatten().real
90 h = A @ a
91 if normalized:
92 h /= h.sum()
93 a /= a.sum()
94 hubs = dict(zip(G, map(float, h)))
95 authorities = dict(zip(G, map(float, a)))
96 return hubs, authorities
97
98
99def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
100 if isinstance(G, nx.MultiGraph | nx.MultiDiGraph):
101 raise Exception("hits() not defined for graphs with multiedges.")
102 if len(G) == 0:
103 return {}, {}
104 # choose fixed starting vector if not given
105 if nstart is None:
106 h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
107 else:
108 h = nstart
109 # normalize starting vector
110 s = 1.0 / sum(h.values())
111 for k in h:
112 h[k] *= s
113 for _ in range(max_iter): # power iteration: make up to max_iter iterations
114 hlast = h
115 h = dict.fromkeys(hlast.keys(), 0)
116 a = dict.fromkeys(hlast.keys(), 0)
117 # this "matrix multiply" looks odd because it is
118 # doing a left multiply a^T=hlast^T*G
119 for n in h:
120 for nbr in G[n]:
121 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
122 # now multiply h=Ga
123 for n in h:
124 for nbr in G[n]:
125 h[n] += a[nbr] * G[n][nbr].get("weight", 1)
126 # normalize vector
127 s = 1.0 / max(h.values())
128 for n in h:
129 h[n] *= s
130 # normalize vector
131 s = 1.0 / max(a.values())
132 for n in a:
133 a[n] *= s
134 # check convergence, l1 norm
135 err = sum(abs(h[n] - hlast[n]) for n in h)
136 if err < tol:
137 break
138 else:
139 raise nx.PowerIterationFailedConvergence(max_iter)
140 if normalized:
141 s = 1.0 / sum(a.values())
142 for n in a:
143 a[n] *= s
144 s = 1.0 / sum(h.values())
145 for n in h:
146 h[n] *= s
147 return h, a
148
149
150def _hits_numpy(G, normalized=True):
151 """Returns HITS hubs and authorities values for nodes.
152
153 The HITS algorithm computes two numbers for a node.
154 Authorities estimates the node value based on the incoming links.
155 Hubs estimates the node value based on outgoing links.
156
157 Parameters
158 ----------
159 G : graph
160 A NetworkX graph
161
162 normalized : bool (default=True)
163 Normalize results by the sum of all of the values.
164
165 Returns
166 -------
167 (hubs,authorities) : two-tuple of dictionaries
168 Two dictionaries keyed by node containing the hub and authority
169 values.
170
171 Examples
172 --------
173 >>> G = nx.path_graph(4)
174
175 The `hubs` and `authorities` are given by the eigenvectors corresponding to the
176 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.
177
178 The ``hubs`` and ``authority`` matrices are computed from the adjacency
179 matrix:
180
181 >>> adj_ary = nx.to_numpy_array(G)
182 >>> hubs_matrix = adj_ary @ adj_ary.T
183 >>> authority_matrix = adj_ary.T @ adj_ary
184
185 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue
186 of the respective matrices to the nodes in `G`:
187
188 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy
189 >>> hubs, authority = _hits_numpy(G)
190
191 Notes
192 -----
193 The eigenvector calculation uses NumPy's interface to LAPACK.
194
195 The HITS algorithm was designed for directed graphs but this
196 algorithm does not check if the input graph is directed and will
197 execute on undirected graphs.
198
199 References
200 ----------
201 .. [1] A. Langville and C. Meyer,
202 "A survey of eigenvector methods of web information retrieval."
203 http://citeseer.ist.psu.edu/713792.html
204 .. [2] Jon Kleinberg,
205 Authoritative sources in a hyperlinked environment
206 Journal of the ACM 46 (5): 604-32, 1999.
207 doi:10.1145/324133.324140.
208 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
209 """
210 import numpy as np
211
212 if len(G) == 0:
213 return {}, {}
214 adj_ary = nx.to_numpy_array(G)
215 # Hub matrix
216 H = adj_ary @ adj_ary.T
217 e, ev = np.linalg.eig(H)
218 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
219 # Authority matrix
220 A = adj_ary.T @ adj_ary
221 e, ev = np.linalg.eig(A)
222 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
223 if normalized:
224 h /= h.sum()
225 a /= a.sum()
226 else:
227 h /= h.max()
228 a /= a.max()
229 hubs = dict(zip(G, map(float, h)))
230 authorities = dict(zip(G, map(float, a)))
231 return hubs, authorities
232
233
234def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
235 """Returns HITS hubs and authorities values for nodes.
236
237
238 The HITS algorithm computes two numbers for a node.
239 Authorities estimates the node value based on the incoming links.
240 Hubs estimates the node value based on outgoing links.
241
242 Parameters
243 ----------
244 G : graph
245 A NetworkX graph
246
247 max_iter : integer, optional
248 Maximum number of iterations in power method.
249
250 tol : float, optional
251 Error tolerance used to check convergence in power method iteration.
252
253 nstart : dictionary, optional
254 Starting value of each node for power method iteration.
255
256 normalized : bool (default=True)
257 Normalize results by the sum of all of the values.
258
259 Returns
260 -------
261 (hubs,authorities) : two-tuple of dictionaries
262 Two dictionaries keyed by node containing the hub and authority
263 values.
264
265 Examples
266 --------
267 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy
268 >>> G = nx.path_graph(4)
269 >>> h, a = _hits_scipy(G)
270
271 Notes
272 -----
273 This implementation uses SciPy sparse matrices.
274
275 The eigenvector calculation is done by the power iteration method
276 and has no guarantee of convergence. The iteration will stop
277 after max_iter iterations or an error tolerance of
278 number_of_nodes(G)*tol has been reached.
279
280 The HITS algorithm was designed for directed graphs but this
281 algorithm does not check if the input graph is directed and will
282 execute on undirected graphs.
283
284 Raises
285 ------
286 PowerIterationFailedConvergence
287 If the algorithm fails to converge to the specified tolerance
288 within the specified number of iterations of the power iteration
289 method.
290
291 References
292 ----------
293 .. [1] A. Langville and C. Meyer,
294 "A survey of eigenvector methods of web information retrieval."
295 http://citeseer.ist.psu.edu/713792.html
296 .. [2] Jon Kleinberg,
297 Authoritative sources in a hyperlinked environment
298 Journal of the ACM 46 (5): 604-632, 1999.
299 doi:10.1145/324133.324140.
300 http://www.cs.cornell.edu/home/kleinber/auth.pdf.
301 """
302 import numpy as np
303
304 if len(G) == 0:
305 return {}, {}
306 A = nx.to_scipy_sparse_array(G, nodelist=list(G))
307 (n, _) = A.shape # should be square
308 ATA = A.T @ A # authority matrix
309 # choose fixed starting vector if not given
310 if nstart is None:
311 x = np.ones((n, 1)) / n
312 else:
313 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
314 x /= x.sum()
315
316 # power iteration on authority matrix
317 i = 0
318 while True:
319 xlast = x
320 x = ATA @ x
321 x /= x.max()
322 # check convergence, l1 norm
323 err = np.absolute(x - xlast).sum()
324 if err < tol:
325 break
326 if i > max_iter:
327 raise nx.PowerIterationFailedConvergence(max_iter)
328 i += 1
329
330 a = x.flatten()
331 h = A @ a
332 if normalized:
333 h /= h.sum()
334 a /= a.sum()
335 hubs = dict(zip(G, map(float, h)))
336 authorities = dict(zip(G, map(float, a)))
337 return hubs, authorities