Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/hits

1"""Hubs and authorities analysis of graph structure.

2"""

3import networkx as nx

5__all__ = ["hits"]

8@nx._dispatch

9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):

10 """Returns HITS hubs and authorities values for nodes.

12 The HITS algorithm computes two numbers for a node.

13 Authorities estimates the node value based on the incoming links.

14 Hubs estimates the node value based on outgoing links.

16 Parameters

17 ----------

18 G : graph

19 A NetworkX graph

21 max_iter : integer, optional

22 Maximum number of iterations in power method.

24 tol : float, optional

25 Error tolerance used to check convergence in power method iteration.

27 nstart : dictionary, optional

28 Starting value of each node for power method iteration.

30 normalized : bool (default=True)

31 Normalize results by the sum of all of the values.

33 Returns

34 -------

35 (hubs,authorities) : two-tuple of dictionaries

36 Two dictionaries keyed by node containing the hub and authority

37 values.

39 Raises

40 ------

41 PowerIterationFailedConvergence

42 If the algorithm fails to converge to the specified tolerance

43 within the specified number of iterations of the power iteration

44 method.

46 Examples

47 --------

48 >>> G = nx.path_graph(4)

49 >>> h, a = nx.hits(G)

51 Notes

52 -----

53 The eigenvector calculation is done by the power iteration method

54 and has no guarantee of convergence. The iteration will stop

55 after max_iter iterations or an error tolerance of

56 number_of_nodes(G)*tol has been reached.

58 The HITS algorithm was designed for directed graphs but this

59 algorithm does not check if the input graph is directed and will

60 execute on undirected graphs.

62 References

63 ----------

64 .. [1] A. Langville and C. Meyer,

65 "A survey of eigenvector methods of web information retrieval."

66 http://citeseer.ist.psu.edu/713792.html

67 .. [2] Jon Kleinberg,

68 Authoritative sources in a hyperlinked environment

69 Journal of the ACM 46 (5): 604-32, 1999.

70 doi:10.1145/324133.324140.

71 http://www.cs.cornell.edu/home/kleinber/auth.pdf.

72 """

73 import numpy as np

74 import scipy as sp

76 if len(G) == 0:

77 return {}, {}

78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)

80 if nstart is None:

81 _, _, vt = sp.sparse.linalg.svds(A, k=1, maxiter=max_iter, tol=tol)

82 else:

83 nstart = np.array(list(nstart.values()))

84 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)

86 a = vt.flatten().real

87 h = A @ a

88 if normalized:

89 h /= h.sum()

90 a /= a.sum()

91 hubs = dict(zip(G, map(float, h)))

92 authorities = dict(zip(G, map(float, a)))

93 return hubs, authorities

96def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):

97 if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):

98 raise Exception("hits() not defined for graphs with multiedges.")

99 if len(G) == 0:

100 return {}, {}

101 # choose fixed starting vector if not given

102 if nstart is None:

103 h = dict.fromkeys(G, 1.0 / G.number_of_nodes())

104 else:

105 h = nstart

106 # normalize starting vector

107 s = 1.0 / sum(h.values())

108 for k in h:

109 h[k] *= s

110 for _ in range(max_iter): # power iteration: make up to max_iter iterations

111 hlast = h

112 h = dict.fromkeys(hlast.keys(), 0)

113 a = dict.fromkeys(hlast.keys(), 0)

114 # this "matrix multiply" looks odd because it is

115 # doing a left multiply a^T=hlast^T*G

116 for n in h:

117 for nbr in G[n]:

118 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)

119 # now multiply h=Ga

120 for n in h:

121 for nbr in G[n]:

122 h[n] += a[nbr] * G[n][nbr].get("weight", 1)

123 # normalize vector

124 s = 1.0 / max(h.values())

125 for n in h:

126 h[n] *= s

127 # normalize vector

128 s = 1.0 / max(a.values())

129 for n in a:

130 a[n] *= s

131 # check convergence, l1 norm

132 err = sum(abs(h[n] - hlast[n]) for n in h)

133 if err < tol:

134 break

135 else:

136 raise nx.PowerIterationFailedConvergence(max_iter)

137 if normalized:

138 s = 1.0 / sum(a.values())

139 for n in a:

140 a[n] *= s

141 s = 1.0 / sum(h.values())

142 for n in h:

143 h[n] *= s

144 return h, a

145

146

147def _hits_numpy(G, normalized=True):

148 """Returns HITS hubs and authorities values for nodes.

149

150 The HITS algorithm computes two numbers for a node.

151 Authorities estimates the node value based on the incoming links.

152 Hubs estimates the node value based on outgoing links.

153

154 Parameters

155 ----------

156 G : graph

157 A NetworkX graph

158

159 normalized : bool (default=True)

160 Normalize results by the sum of all of the values.

161

162 Returns

163 -------

164 (hubs,authorities) : two-tuple of dictionaries

165 Two dictionaries keyed by node containing the hub and authority

166 values.

167

168 Examples

169 --------

170 >>> G = nx.path_graph(4)

171

172 The `hubs` and `authorities` are given by the eigenvectors corresponding to the

173 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.

174

175 The ``hubs`` and ``authority`` matrices are computed from the adjacency

176 matrix:

177

178 >>> adj_ary = nx.to_numpy_array(G)

179 >>> hubs_matrix = adj_ary @ adj_ary.T

180 >>> authority_matrix = adj_ary.T @ adj_ary

181

182 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue

183 of the respective matrices to the nodes in `G`:

184

185 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy

186 >>> hubs, authority = _hits_numpy(G)

187

188 Notes

189 -----

190 The eigenvector calculation uses NumPy's interface to LAPACK.

191

192 The HITS algorithm was designed for directed graphs but this

193 algorithm does not check if the input graph is directed and will

194 execute on undirected graphs.

195

196 References

197 ----------

198 .. [1] A. Langville and C. Meyer,

199 "A survey of eigenvector methods of web information retrieval."

200 http://citeseer.ist.psu.edu/713792.html

201 .. [2] Jon Kleinberg,

202 Authoritative sources in a hyperlinked environment

203 Journal of the ACM 46 (5): 604-32, 1999.

204 doi:10.1145/324133.324140.

205 http://www.cs.cornell.edu/home/kleinber/auth.pdf.

206 """

207 import numpy as np

208

209 if len(G) == 0:

210 return {}, {}

211 adj_ary = nx.to_numpy_array(G)

212 # Hub matrix

213 H = adj_ary @ adj_ary.T

214 e, ev = np.linalg.eig(H)

215 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue

216 # Authority matrix

217 A = adj_ary.T @ adj_ary

218 e, ev = np.linalg.eig(A)

219 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue

220 if normalized:

221 h /= h.sum()

222 a /= a.sum()

223 else:

224 h /= h.max()

225 a /= a.max()

226 hubs = dict(zip(G, map(float, h)))

227 authorities = dict(zip(G, map(float, a)))

228 return hubs, authorities

229

230

231def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):

232 """Returns HITS hubs and authorities values for nodes.

233

234

235 The HITS algorithm computes two numbers for a node.

236 Authorities estimates the node value based on the incoming links.

237 Hubs estimates the node value based on outgoing links.

238

239 Parameters

240 ----------

241 G : graph

242 A NetworkX graph

243

244 max_iter : integer, optional

245 Maximum number of iterations in power method.

246

247 tol : float, optional

248 Error tolerance used to check convergence in power method iteration.

249

250 nstart : dictionary, optional

251 Starting value of each node for power method iteration.

252

253 normalized : bool (default=True)

254 Normalize results by the sum of all of the values.

255

256 Returns

257 -------

258 (hubs,authorities) : two-tuple of dictionaries

259 Two dictionaries keyed by node containing the hub and authority

260 values.

261

262 Examples

263 --------

264 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy

265 >>> G = nx.path_graph(4)

266 >>> h, a = _hits_scipy(G)

267

268 Notes

269 -----

270 This implementation uses SciPy sparse matrices.

271

272 The eigenvector calculation is done by the power iteration method

273 and has no guarantee of convergence. The iteration will stop

274 after max_iter iterations or an error tolerance of

275 number_of_nodes(G)*tol has been reached.

276

277 The HITS algorithm was designed for directed graphs but this

278 algorithm does not check if the input graph is directed and will

279 execute on undirected graphs.

280

281 Raises

282 ------

283 PowerIterationFailedConvergence

284 If the algorithm fails to converge to the specified tolerance

285 within the specified number of iterations of the power iteration

286 method.

287

288 References

289 ----------

290 .. [1] A. Langville and C. Meyer,

291 "A survey of eigenvector methods of web information retrieval."

292 http://citeseer.ist.psu.edu/713792.html

293 .. [2] Jon Kleinberg,

294 Authoritative sources in a hyperlinked environment

295 Journal of the ACM 46 (5): 604-632, 1999.

296 doi:10.1145/324133.324140.

297 http://www.cs.cornell.edu/home/kleinber/auth.pdf.

298 """

299 import numpy as np

300

301 if len(G) == 0:

302 return {}, {}

303 A = nx.to_scipy_sparse_array(G, nodelist=list(G))

304 (n, _) = A.shape # should be square

305 ATA = A.T @ A # authority matrix

306 # choose fixed starting vector if not given

307 if nstart is None:

308 x = np.ones((n, 1)) / n

309 else:

310 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)

311 x /= x.sum()

312

313 # power iteration on authority matrix

314 i = 0

315 while True:

316 xlast = x

317 x = ATA @ x

318 x /= x.max()

319 # check convergence, l1 norm

320 err = np.absolute(x - xlast).sum()

321 if err < tol:

322 break

323 if i > max_iter:

324 raise nx.PowerIterationFailedConvergence(max_iter)

325 i += 1

326

327 a = x.flatten()

328 h = A @ a

329 if normalized:

330 h /= h.sum()

331 a /= a.sum()

332 hubs = dict(zip(G, map(float, h)))

333 authorities = dict(zip(G, map(float, a)))

334 return hubs, authorities

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/hits_alg.py: 6%

108 statements