Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/link_analysis/hits

1"""Hubs and authorities analysis of graph structure."""

3import networkx as nx

5__all__ = ["hits"]

8@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}})

9def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):

10 """Returns HITS hubs and authorities values for nodes.

12 The HITS algorithm computes two numbers for a node.

13 Authorities estimates the node value based on the incoming links.

14 Hubs estimates the node value based on outgoing links.

16 Parameters

17 ----------

18 G : graph

19 A NetworkX graph

21 max_iter : integer, optional

22 Maximum number of iterations in power method.

24 tol : float, optional

25 Error tolerance used to check convergence in power method iteration.

27 nstart : dictionary, optional

28 Starting value of each node for power method iteration.

30 normalized : bool (default=True)

31 Normalize results by the sum of all of the values.

33 Returns

34 -------

35 (hubs,authorities) : two-tuple of dictionaries

36 Two dictionaries keyed by node containing the hub and authority

37 values.

39 Raises

40 ------

41 PowerIterationFailedConvergence

42 If the algorithm fails to converge to the specified tolerance

43 within the specified number of iterations of the power iteration

44 method.

46 Examples

47 --------

48 >>> G = nx.path_graph(4)

49 >>> h, a = nx.hits(G)

51 Notes

52 -----

53 The eigenvector calculation is done by the power iteration method

54 and has no guarantee of convergence. The iteration will stop

55 after max_iter iterations or an error tolerance of

56 number_of_nodes(G)*tol has been reached.

58 The HITS algorithm was designed for directed graphs but this

59 algorithm does not check if the input graph is directed and will

60 execute on undirected graphs.

62 References

63 ----------

64 .. [1] A. Langville and C. Meyer,

65 "A survey of eigenvector methods of web information retrieval."

66 https://epubs.siam.org/doi/epdf/10.1137/S0036144503424786

67 .. [2] Jon Kleinberg,

68 Authoritative sources in a hyperlinked environment

69 Journal of the ACM 46 (5): 604-32, 1999.

70 https://www.cs.cornell.edu/home/kleinber/auth.pdf

71 doi:10.1145/324133.324140.

72 """

73 import numpy as np

74 import scipy as sp

76 if len(G) == 0:

77 return {}, {}

78 A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)

80 if nstart is not None:

81 nstart = np.array(list(nstart.values()))

82 if max_iter <= 0:

83 raise nx.PowerIterationFailedConvergence(max_iter)

84 try:

85 _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)

86 except sp.sparse.linalg.ArpackNoConvergence as exc:

87 raise nx.PowerIterationFailedConvergence(max_iter) from exc

89 a = vt.flatten().real

90 h = A @ a

91 if normalized:

92 h /= h.sum()

93 a /= a.sum()

94 hubs = dict(zip(G, map(float, h)))

95 authorities = dict(zip(G, map(float, a)))

96 return hubs, authorities

99def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):

100 if isinstance(G, nx.MultiGraph | nx.MultiDiGraph):

101 raise Exception("hits() not defined for graphs with multiedges.")

102 if len(G) == 0:

103 return {}, {}

104 # choose fixed starting vector if not given

105 if nstart is None:

106 h = dict.fromkeys(G, 1.0 / G.number_of_nodes())

107 else:

108 h = nstart

109 # normalize starting vector

110 s = 1.0 / sum(h.values())

111 for k in h:

112 h[k] *= s

113 for _ in range(max_iter): # power iteration: make up to max_iter iterations

114 hlast = h

115 h = dict.fromkeys(hlast.keys(), 0)

116 a = dict.fromkeys(hlast.keys(), 0)

117 # this "matrix multiply" looks odd because it is

118 # doing a left multiply a^T=hlast^T*G

119 for n in h:

120 for nbr in G[n]:

121 a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)

122 # now multiply h=Ga

123 for n in h:

124 for nbr in G[n]:

125 h[n] += a[nbr] * G[n][nbr].get("weight", 1)

126 # normalize vector

127 s = 1.0 / max(h.values())

128 for n in h:

129 h[n] *= s

130 # normalize vector

131 s = 1.0 / max(a.values())

132 for n in a:

133 a[n] *= s

134 # check convergence, l1 norm

135 err = sum(abs(h[n] - hlast[n]) for n in h)

136 if err < tol:

137 break

138 else:

139 raise nx.PowerIterationFailedConvergence(max_iter)

140 if normalized:

141 s = 1.0 / sum(a.values())

142 for n in a:

143 a[n] *= s

144 s = 1.0 / sum(h.values())

145 for n in h:

146 h[n] *= s

147 return h, a

148

149

150def _hits_numpy(G, normalized=True):

151 """Returns HITS hubs and authorities values for nodes.

152

153 The HITS algorithm computes two numbers for a node.

154 Authorities estimates the node value based on the incoming links.

155 Hubs estimates the node value based on outgoing links.

156

157 Parameters

158 ----------

159 G : graph

160 A NetworkX graph

161

162 normalized : bool (default=True)

163 Normalize results by the sum of all of the values.

164

165 Returns

166 -------

167 (hubs,authorities) : two-tuple of dictionaries

168 Two dictionaries keyed by node containing the hub and authority

169 values.

170

171 Examples

172 --------

173 >>> G = nx.path_graph(4)

174

175 The `hubs` and `authorities` are given by the eigenvectors corresponding to the

176 maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.

177

178 The ``hubs`` and ``authority`` matrices are computed from the adjacency

179 matrix:

180

181 >>> adj_ary = nx.to_numpy_array(G)

182 >>> hubs_matrix = adj_ary @ adj_ary.T

183 >>> authority_matrix = adj_ary.T @ adj_ary

184

185 `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue

186 of the respective matrices to the nodes in `G`:

187

188 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy

189 >>> hubs, authority = _hits_numpy(G)

190

191 Notes

192 -----

193 The eigenvector calculation uses NumPy's interface to LAPACK.

194

195 The HITS algorithm was designed for directed graphs but this

196 algorithm does not check if the input graph is directed and will

197 execute on undirected graphs.

198

199 References

200 ----------

201 .. [1] A. Langville and C. Meyer,

202 "A survey of eigenvector methods of web information retrieval."

203 http://citeseer.ist.psu.edu/713792.html

204 .. [2] Jon Kleinberg,

205 Authoritative sources in a hyperlinked environment

206 Journal of the ACM 46 (5): 604-32, 1999.

207 doi:10.1145/324133.324140.

208 http://www.cs.cornell.edu/home/kleinber/auth.pdf.

209 """

210 import numpy as np

211

212 if len(G) == 0:

213 return {}, {}

214 adj_ary = nx.to_numpy_array(G)

215 # Hub matrix

216 H = adj_ary @ adj_ary.T

217 e, ev = np.linalg.eig(H)

218 h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue

219 # Authority matrix

220 A = adj_ary.T @ adj_ary

221 e, ev = np.linalg.eig(A)

222 a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue

223 if normalized:

224 h /= h.sum()

225 a /= a.sum()

226 else:

227 h /= h.max()

228 a /= a.max()

229 hubs = dict(zip(G, map(float, h)))

230 authorities = dict(zip(G, map(float, a)))

231 return hubs, authorities

232

233

234def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):

235 """Returns HITS hubs and authorities values for nodes.

236

237

238 The HITS algorithm computes two numbers for a node.

239 Authorities estimates the node value based on the incoming links.

240 Hubs estimates the node value based on outgoing links.

241

242 Parameters

243 ----------

244 G : graph

245 A NetworkX graph

246

247 max_iter : integer, optional

248 Maximum number of iterations in power method.

249

250 tol : float, optional

251 Error tolerance used to check convergence in power method iteration.

252

253 nstart : dictionary, optional

254 Starting value of each node for power method iteration.

255

256 normalized : bool (default=True)

257 Normalize results by the sum of all of the values.

258

259 Returns

260 -------

261 (hubs,authorities) : two-tuple of dictionaries

262 Two dictionaries keyed by node containing the hub and authority

263 values.

264

265 Examples

266 --------

267 >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy

268 >>> G = nx.path_graph(4)

269 >>> h, a = _hits_scipy(G)

270

271 Notes

272 -----

273 This implementation uses SciPy sparse matrices.

274

275 The eigenvector calculation is done by the power iteration method

276 and has no guarantee of convergence. The iteration will stop

277 after max_iter iterations or an error tolerance of

278 number_of_nodes(G)*tol has been reached.

279

280 The HITS algorithm was designed for directed graphs but this

281 algorithm does not check if the input graph is directed and will

282 execute on undirected graphs.

283

284 Raises

285 ------

286 PowerIterationFailedConvergence

287 If the algorithm fails to converge to the specified tolerance

288 within the specified number of iterations of the power iteration

289 method.

290

291 References

292 ----------

293 .. [1] A. Langville and C. Meyer,

294 "A survey of eigenvector methods of web information retrieval."

295 http://citeseer.ist.psu.edu/713792.html

296 .. [2] Jon Kleinberg,

297 Authoritative sources in a hyperlinked environment

298 Journal of the ACM 46 (5): 604-632, 1999.

299 doi:10.1145/324133.324140.

300 http://www.cs.cornell.edu/home/kleinber/auth.pdf.

301 """

302 import numpy as np

303

304 if len(G) == 0:

305 return {}, {}

306 A = nx.to_scipy_sparse_array(G, nodelist=list(G))

307 (n, _) = A.shape # should be square

308 ATA = A.T @ A # authority matrix

309 # choose fixed starting vector if not given

310 if nstart is None:

311 x = np.ones((n, 1)) / n

312 else:

313 x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)

314 x /= x.sum()

315

316 # power iteration on authority matrix

317 i = 0

318 while True:

319 xlast = x

320 x = ATA @ x

321 x /= x.max()

322 # check convergence, l1 norm

323 err = np.absolute(x - xlast).sum()

324 if err < tol:

325 break

326 if i > max_iter:

327 raise nx.PowerIterationFailedConvergence(max_iter)

328 i += 1

329

330 a = x.flatten()

331 h = A @ a

332 if normalized:

333 h /= h.sum()

334 a /= a.sum()

335 hubs = dict(zip(G, map(float, h)))

336 authorities = dict(zip(G, map(float, a)))

337 return hubs, authorities

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/link_analysis/hits_alg.py: 7%

114 statements