Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/link_analysis/hits_alg.py: 7%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

114 statements  

1"""Hubs and authorities analysis of graph structure.""" 

2 

3import networkx as nx 

4 

5__all__ = ["hits"] 

6 

7 

8@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}}) 

9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): 

10 """Returns HITS hubs and authorities values for nodes. 

11 

12 The HITS algorithm computes two numbers for a node. 

13 Authorities estimates the node value based on the incoming links. 

14 Hubs estimates the node value based on outgoing links. 

15 

16 Parameters 

17 ---------- 

18 G : graph 

19 A NetworkX graph 

20 

21 max_iter : integer, optional 

22 Maximum number of iterations in power method. 

23 

24 tol : float, optional 

25 Error tolerance used to check convergence in power method iteration. 

26 

27 nstart : dictionary, optional 

28 Starting value of each node for power method iteration. 

29 

30 normalized : bool (default=True) 

31 Normalize results by the sum of all of the values. 

32 

33 Returns 

34 ------- 

35 (hubs,authorities) : two-tuple of dictionaries 

36 Two dictionaries keyed by node containing the hub and authority 

37 values. 

38 

39 Raises 

40 ------ 

41 PowerIterationFailedConvergence 

42 If the algorithm fails to converge to the specified tolerance 

43 within the specified number of iterations of the power iteration 

44 method. 

45 

46 Examples 

47 -------- 

48 >>> G = nx.path_graph(4) 

49 >>> h, a = nx.hits(G) 

50 

51 Notes 

52 ----- 

53 The eigenvector calculation is done by the power iteration method 

54 and has no guarantee of convergence. The iteration will stop 

55 after max_iter iterations or an error tolerance of 

56 number_of_nodes(G)*tol has been reached. 

57 

58 The HITS algorithm was designed for directed graphs but this 

59 algorithm does not check if the input graph is directed and will 

60 execute on undirected graphs. 

61 

62 References 

63 ---------- 

64 .. [1] A. Langville and C. Meyer, 

65 "A survey of eigenvector methods of web information retrieval." 

66 https://epubs.siam.org/doi/epdf/10.1137/S0036144503424786 

67 .. [2] Jon Kleinberg, 

68 Authoritative sources in a hyperlinked environment 

69 Journal of the ACM 46 (5): 604-32, 1999. 

70 https://www.cs.cornell.edu/home/kleinber/auth.pdf 

71 doi:10.1145/324133.324140. 

72 """ 

73 import numpy as np 

74 import scipy as sp 

75 

76 if len(G) == 0: 

77 return {}, {} 

78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float) 

79 

80 if nstart is not None: 

81 nstart = np.array(list(nstart.values())) 

82 if max_iter <= 0: 

83 raise nx.PowerIterationFailedConvergence(max_iter) 

84 try: 

85 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol) 

86 except sp.sparse.linalg.ArpackNoConvergence as exc: 

87 raise nx.PowerIterationFailedConvergence(max_iter) from exc 

88 

89 a = vt.flatten().real 

90 h = A @ a 

91 if normalized: 

92 h /= h.sum() 

93 a /= a.sum() 

94 hubs = dict(zip(G, map(float, h))) 

95 authorities = dict(zip(G, map(float, a))) 

96 return hubs, authorities 

97 

98 

99def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): 

100 if isinstance(G, nx.MultiGraph | nx.MultiDiGraph): 

101 raise Exception("hits() not defined for graphs with multiedges.") 

102 if len(G) == 0: 

103 return {}, {} 

104 # choose fixed starting vector if not given 

105 if nstart is None: 

106 h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) 

107 else: 

108 h = nstart 

109 # normalize starting vector 

110 s = 1.0 / sum(h.values()) 

111 for k in h: 

112 h[k] *= s 

113 for _ in range(max_iter): # power iteration: make up to max_iter iterations 

114 hlast = h 

115 h = dict.fromkeys(hlast.keys(), 0) 

116 a = dict.fromkeys(hlast.keys(), 0) 

117 # this "matrix multiply" looks odd because it is 

118 # doing a left multiply a^T=hlast^T*G 

119 for n in h: 

120 for nbr in G[n]: 

121 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) 

122 # now multiply h=Ga 

123 for n in h: 

124 for nbr in G[n]: 

125 h[n] += a[nbr] * G[n][nbr].get("weight", 1) 

126 # normalize vector 

127 s = 1.0 / max(h.values()) 

128 for n in h: 

129 h[n] *= s 

130 # normalize vector 

131 s = 1.0 / max(a.values()) 

132 for n in a: 

133 a[n] *= s 

134 # check convergence, l1 norm 

135 err = sum(abs(h[n] - hlast[n]) for n in h) 

136 if err < tol: 

137 break 

138 else: 

139 raise nx.PowerIterationFailedConvergence(max_iter) 

140 if normalized: 

141 s = 1.0 / sum(a.values()) 

142 for n in a: 

143 a[n] *= s 

144 s = 1.0 / sum(h.values()) 

145 for n in h: 

146 h[n] *= s 

147 return h, a 

148 

149 

150def _hits_numpy(G, normalized=True): 

151 """Returns HITS hubs and authorities values for nodes. 

152 

153 The HITS algorithm computes two numbers for a node. 

154 Authorities estimates the node value based on the incoming links. 

155 Hubs estimates the node value based on outgoing links. 

156 

157 Parameters 

158 ---------- 

159 G : graph 

160 A NetworkX graph 

161 

162 normalized : bool (default=True) 

163 Normalize results by the sum of all of the values. 

164 

165 Returns 

166 ------- 

167 (hubs,authorities) : two-tuple of dictionaries 

168 Two dictionaries keyed by node containing the hub and authority 

169 values. 

170 

171 Examples 

172 -------- 

173 >>> G = nx.path_graph(4) 

174 

175 The `hubs` and `authorities` are given by the eigenvectors corresponding to the 

176 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively. 

177 

178 The ``hubs`` and ``authority`` matrices are computed from the adjacency 

179 matrix: 

180 

181 >>> adj_ary = nx.to_numpy_array(G) 

182 >>> hubs_matrix = adj_ary @ adj_ary.T 

183 >>> authority_matrix = adj_ary.T @ adj_ary 

184 

185 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue 

186 of the respective matrices to the nodes in `G`: 

187 

188 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy 

189 >>> hubs, authority = _hits_numpy(G) 

190 

191 Notes 

192 ----- 

193 The eigenvector calculation uses NumPy's interface to LAPACK. 

194 

195 The HITS algorithm was designed for directed graphs but this 

196 algorithm does not check if the input graph is directed and will 

197 execute on undirected graphs. 

198 

199 References 

200 ---------- 

201 .. [1] A. Langville and C. Meyer, 

202 "A survey of eigenvector methods of web information retrieval." 

203 http://citeseer.ist.psu.edu/713792.html 

204 .. [2] Jon Kleinberg, 

205 Authoritative sources in a hyperlinked environment 

206 Journal of the ACM 46 (5): 604-32, 1999. 

207 doi:10.1145/324133.324140. 

208 http://www.cs.cornell.edu/home/kleinber/auth.pdf. 

209 """ 

210 import numpy as np 

211 

212 if len(G) == 0: 

213 return {}, {} 

214 adj_ary = nx.to_numpy_array(G) 

215 # Hub matrix 

216 H = adj_ary @ adj_ary.T 

217 e, ev = np.linalg.eig(H) 

218 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue 

219 # Authority matrix 

220 A = adj_ary.T @ adj_ary 

221 e, ev = np.linalg.eig(A) 

222 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue 

223 if normalized: 

224 h /= h.sum() 

225 a /= a.sum() 

226 else: 

227 h /= h.max() 

228 a /= a.max() 

229 hubs = dict(zip(G, map(float, h))) 

230 authorities = dict(zip(G, map(float, a))) 

231 return hubs, authorities 

232 

233 

234def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): 

235 """Returns HITS hubs and authorities values for nodes. 

236 

237 

238 The HITS algorithm computes two numbers for a node. 

239 Authorities estimates the node value based on the incoming links. 

240 Hubs estimates the node value based on outgoing links. 

241 

242 Parameters 

243 ---------- 

244 G : graph 

245 A NetworkX graph 

246 

247 max_iter : integer, optional 

248 Maximum number of iterations in power method. 

249 

250 tol : float, optional 

251 Error tolerance used to check convergence in power method iteration. 

252 

253 nstart : dictionary, optional 

254 Starting value of each node for power method iteration. 

255 

256 normalized : bool (default=True) 

257 Normalize results by the sum of all of the values. 

258 

259 Returns 

260 ------- 

261 (hubs,authorities) : two-tuple of dictionaries 

262 Two dictionaries keyed by node containing the hub and authority 

263 values. 

264 

265 Examples 

266 -------- 

267 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy 

268 >>> G = nx.path_graph(4) 

269 >>> h, a = _hits_scipy(G) 

270 

271 Notes 

272 ----- 

273 This implementation uses SciPy sparse matrices. 

274 

275 The eigenvector calculation is done by the power iteration method 

276 and has no guarantee of convergence. The iteration will stop 

277 after max_iter iterations or an error tolerance of 

278 number_of_nodes(G)*tol has been reached. 

279 

280 The HITS algorithm was designed for directed graphs but this 

281 algorithm does not check if the input graph is directed and will 

282 execute on undirected graphs. 

283 

284 Raises 

285 ------ 

286 PowerIterationFailedConvergence 

287 If the algorithm fails to converge to the specified tolerance 

288 within the specified number of iterations of the power iteration 

289 method. 

290 

291 References 

292 ---------- 

293 .. [1] A. Langville and C. Meyer, 

294 "A survey of eigenvector methods of web information retrieval." 

295 http://citeseer.ist.psu.edu/713792.html 

296 .. [2] Jon Kleinberg, 

297 Authoritative sources in a hyperlinked environment 

298 Journal of the ACM 46 (5): 604-632, 1999. 

299 doi:10.1145/324133.324140. 

300 http://www.cs.cornell.edu/home/kleinber/auth.pdf. 

301 """ 

302 import numpy as np 

303 

304 if len(G) == 0: 

305 return {}, {} 

306 A = nx.to_scipy_sparse_array(G, nodelist=list(G)) 

307 (n, _) = A.shape # should be square 

308 ATA = A.T @ A # authority matrix 

309 # choose fixed starting vector if not given 

310 if nstart is None: 

311 x = np.ones((n, 1)) / n 

312 else: 

313 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) 

314 x /= x.sum() 

315 

316 # power iteration on authority matrix 

317 i = 0 

318 while True: 

319 xlast = x 

320 x = ATA @ x 

321 x /= x.max() 

322 # check convergence, l1 norm 

323 err = np.absolute(x - xlast).sum() 

324 if err < tol: 

325 break 

326 if i > max_iter: 

327 raise nx.PowerIterationFailedConvergence(max_iter) 

328 i += 1 

329 

330 a = x.flatten() 

331 h = A @ a 

332 if normalized: 

333 h /= h.sum() 

334 a /= a.sum() 

335 hubs = dict(zip(G, map(float, h))) 

336 authorities = dict(zip(G, map(float, a))) 

337 return hubs, authorities