Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/hits_alg.py: 6%

108 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Hubs and authorities analysis of graph structure. 

2""" 

3import networkx as nx 

4 

5__all__ = ["hits"] 

6 

7 

8@nx._dispatch 

9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): 

10 """Returns HITS hubs and authorities values for nodes. 

11 

12 The HITS algorithm computes two numbers for a node. 

13 Authorities estimates the node value based on the incoming links. 

14 Hubs estimates the node value based on outgoing links. 

15 

16 Parameters 

17 ---------- 

18 G : graph 

19 A NetworkX graph 

20 

21 max_iter : integer, optional 

22 Maximum number of iterations in power method. 

23 

24 tol : float, optional 

25 Error tolerance used to check convergence in power method iteration. 

26 

27 nstart : dictionary, optional 

28 Starting value of each node for power method iteration. 

29 

30 normalized : bool (default=True) 

31 Normalize results by the sum of all of the values. 

32 

33 Returns 

34 ------- 

35 (hubs,authorities) : two-tuple of dictionaries 

36 Two dictionaries keyed by node containing the hub and authority 

37 values. 

38 

39 Raises 

40 ------ 

41 PowerIterationFailedConvergence 

42 If the algorithm fails to converge to the specified tolerance 

43 within the specified number of iterations of the power iteration 

44 method. 

45 

46 Examples 

47 -------- 

48 >>> G = nx.path_graph(4) 

49 >>> h, a = nx.hits(G) 

50 

51 Notes 

52 ----- 

53 The eigenvector calculation is done by the power iteration method 

54 and has no guarantee of convergence. The iteration will stop 

55 after max_iter iterations or an error tolerance of 

56 number_of_nodes(G)*tol has been reached. 

57 

58 The HITS algorithm was designed for directed graphs but this 

59 algorithm does not check if the input graph is directed and will 

60 execute on undirected graphs. 

61 

62 References 

63 ---------- 

64 .. [1] A. Langville and C. Meyer, 

65 "A survey of eigenvector methods of web information retrieval." 

66 http://citeseer.ist.psu.edu/713792.html 

67 .. [2] Jon Kleinberg, 

68 Authoritative sources in a hyperlinked environment 

69 Journal of the ACM 46 (5): 604-32, 1999. 

70 doi:10.1145/324133.324140. 

71 http://www.cs.cornell.edu/home/kleinber/auth.pdf. 

72 """ 

73 import numpy as np 

74 import scipy as sp 

75 

76 if len(G) == 0: 

77 return {}, {} 

78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float) 

79 

80 if nstart is None: 

81 _, _, vt = sp.sparse.linalg.svds(A, k=1, maxiter=max_iter, tol=tol) 

82 else: 

83 nstart = np.array(list(nstart.values())) 

84 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol) 

85 

86 a = vt.flatten().real 

87 h = A @ a 

88 if normalized: 

89 h /= h.sum() 

90 a /= a.sum() 

91 hubs = dict(zip(G, map(float, h))) 

92 authorities = dict(zip(G, map(float, a))) 

93 return hubs, authorities 

94 

95 

96def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): 

97 if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)): 

98 raise Exception("hits() not defined for graphs with multiedges.") 

99 if len(G) == 0: 

100 return {}, {} 

101 # choose fixed starting vector if not given 

102 if nstart is None: 

103 h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) 

104 else: 

105 h = nstart 

106 # normalize starting vector 

107 s = 1.0 / sum(h.values()) 

108 for k in h: 

109 h[k] *= s 

110 for _ in range(max_iter): # power iteration: make up to max_iter iterations 

111 hlast = h 

112 h = dict.fromkeys(hlast.keys(), 0) 

113 a = dict.fromkeys(hlast.keys(), 0) 

114 # this "matrix multiply" looks odd because it is 

115 # doing a left multiply a^T=hlast^T*G 

116 for n in h: 

117 for nbr in G[n]: 

118 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) 

119 # now multiply h=Ga 

120 for n in h: 

121 for nbr in G[n]: 

122 h[n] += a[nbr] * G[n][nbr].get("weight", 1) 

123 # normalize vector 

124 s = 1.0 / max(h.values()) 

125 for n in h: 

126 h[n] *= s 

127 # normalize vector 

128 s = 1.0 / max(a.values()) 

129 for n in a: 

130 a[n] *= s 

131 # check convergence, l1 norm 

132 err = sum(abs(h[n] - hlast[n]) for n in h) 

133 if err < tol: 

134 break 

135 else: 

136 raise nx.PowerIterationFailedConvergence(max_iter) 

137 if normalized: 

138 s = 1.0 / sum(a.values()) 

139 for n in a: 

140 a[n] *= s 

141 s = 1.0 / sum(h.values()) 

142 for n in h: 

143 h[n] *= s 

144 return h, a 

145 

146 

147def _hits_numpy(G, normalized=True): 

148 """Returns HITS hubs and authorities values for nodes. 

149 

150 The HITS algorithm computes two numbers for a node. 

151 Authorities estimates the node value based on the incoming links. 

152 Hubs estimates the node value based on outgoing links. 

153 

154 Parameters 

155 ---------- 

156 G : graph 

157 A NetworkX graph 

158 

159 normalized : bool (default=True) 

160 Normalize results by the sum of all of the values. 

161 

162 Returns 

163 ------- 

164 (hubs,authorities) : two-tuple of dictionaries 

165 Two dictionaries keyed by node containing the hub and authority 

166 values. 

167 

168 Examples 

169 -------- 

170 >>> G = nx.path_graph(4) 

171 

172 The `hubs` and `authorities` are given by the eigenvectors corresponding to the 

173 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively. 

174 

175 The ``hubs`` and ``authority`` matrices are computed from the adjacency 

176 matrix: 

177 

178 >>> adj_ary = nx.to_numpy_array(G) 

179 >>> hubs_matrix = adj_ary @ adj_ary.T 

180 >>> authority_matrix = adj_ary.T @ adj_ary 

181 

182 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue 

183 of the respective matrices to the nodes in `G`: 

184 

185 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy 

186 >>> hubs, authority = _hits_numpy(G) 

187 

188 Notes 

189 ----- 

190 The eigenvector calculation uses NumPy's interface to LAPACK. 

191 

192 The HITS algorithm was designed for directed graphs but this 

193 algorithm does not check if the input graph is directed and will 

194 execute on undirected graphs. 

195 

196 References 

197 ---------- 

198 .. [1] A. Langville and C. Meyer, 

199 "A survey of eigenvector methods of web information retrieval." 

200 http://citeseer.ist.psu.edu/713792.html 

201 .. [2] Jon Kleinberg, 

202 Authoritative sources in a hyperlinked environment 

203 Journal of the ACM 46 (5): 604-32, 1999. 

204 doi:10.1145/324133.324140. 

205 http://www.cs.cornell.edu/home/kleinber/auth.pdf. 

206 """ 

207 import numpy as np 

208 

209 if len(G) == 0: 

210 return {}, {} 

211 adj_ary = nx.to_numpy_array(G) 

212 # Hub matrix 

213 H = adj_ary @ adj_ary.T 

214 e, ev = np.linalg.eig(H) 

215 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue 

216 # Authority matrix 

217 A = adj_ary.T @ adj_ary 

218 e, ev = np.linalg.eig(A) 

219 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue 

220 if normalized: 

221 h /= h.sum() 

222 a /= a.sum() 

223 else: 

224 h /= h.max() 

225 a /= a.max() 

226 hubs = dict(zip(G, map(float, h))) 

227 authorities = dict(zip(G, map(float, a))) 

228 return hubs, authorities 

229 

230 

231def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): 

232 """Returns HITS hubs and authorities values for nodes. 

233 

234 

235 The HITS algorithm computes two numbers for a node. 

236 Authorities estimates the node value based on the incoming links. 

237 Hubs estimates the node value based on outgoing links. 

238 

239 Parameters 

240 ---------- 

241 G : graph 

242 A NetworkX graph 

243 

244 max_iter : integer, optional 

245 Maximum number of iterations in power method. 

246 

247 tol : float, optional 

248 Error tolerance used to check convergence in power method iteration. 

249 

250 nstart : dictionary, optional 

251 Starting value of each node for power method iteration. 

252 

253 normalized : bool (default=True) 

254 Normalize results by the sum of all of the values. 

255 

256 Returns 

257 ------- 

258 (hubs,authorities) : two-tuple of dictionaries 

259 Two dictionaries keyed by node containing the hub and authority 

260 values. 

261 

262 Examples 

263 -------- 

264 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy 

265 >>> G = nx.path_graph(4) 

266 >>> h, a = _hits_scipy(G) 

267 

268 Notes 

269 ----- 

270 This implementation uses SciPy sparse matrices. 

271 

272 The eigenvector calculation is done by the power iteration method 

273 and has no guarantee of convergence. The iteration will stop 

274 after max_iter iterations or an error tolerance of 

275 number_of_nodes(G)*tol has been reached. 

276 

277 The HITS algorithm was designed for directed graphs but this 

278 algorithm does not check if the input graph is directed and will 

279 execute on undirected graphs. 

280 

281 Raises 

282 ------ 

283 PowerIterationFailedConvergence 

284 If the algorithm fails to converge to the specified tolerance 

285 within the specified number of iterations of the power iteration 

286 method. 

287 

288 References 

289 ---------- 

290 .. [1] A. Langville and C. Meyer, 

291 "A survey of eigenvector methods of web information retrieval." 

292 http://citeseer.ist.psu.edu/713792.html 

293 .. [2] Jon Kleinberg, 

294 Authoritative sources in a hyperlinked environment 

295 Journal of the ACM 46 (5): 604-632, 1999. 

296 doi:10.1145/324133.324140. 

297 http://www.cs.cornell.edu/home/kleinber/auth.pdf. 

298 """ 

299 import numpy as np 

300 

301 if len(G) == 0: 

302 return {}, {} 

303 A = nx.to_scipy_sparse_array(G, nodelist=list(G)) 

304 (n, _) = A.shape # should be square 

305 ATA = A.T @ A # authority matrix 

306 # choose fixed starting vector if not given 

307 if nstart is None: 

308 x = np.ones((n, 1)) / n 

309 else: 

310 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) 

311 x /= x.sum() 

312 

313 # power iteration on authority matrix 

314 i = 0 

315 while True: 

316 xlast = x 

317 x = ATA @ x 

318 x /= x.max() 

319 # check convergence, l1 norm 

320 err = np.absolute(x - xlast).sum() 

321 if err < tol: 

322 break 

323 if i > max_iter: 

324 raise nx.PowerIterationFailedConvergence(max_iter) 

325 i += 1 

326 

327 a = x.flatten() 

328 h = A @ a 

329 if normalized: 

330 h /= h.sum() 

331 a /= a.sum() 

332 hubs = dict(zip(G, map(float, h))) 

333 authorities = dict(zip(G, map(float, a))) 

334 return hubs, authorities