Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/pagerank

1"""PageRank analysis of graph structure. """

2from warnings import warn

4import networkx as nx

6__all__ = ["pagerank", "google_matrix"]

9@nx._dispatch(edge_attrs="weight")

10def pagerank(

11 G,

12 alpha=0.85,

13 personalization=None,

14 max_iter=100,

15 tol=1.0e-6,

16 nstart=None,

17 weight="weight",

18 dangling=None,

19):

20 """Returns the PageRank of the nodes in the graph.

22 PageRank computes a ranking of the nodes in the graph G based on

23 the structure of the incoming links. It was originally designed as

24 an algorithm to rank web pages.

26 Parameters

27 ----------

28 G : graph

29 A NetworkX graph. Undirected graphs will be converted to a directed

30 graph with two directed edges for each undirected edge.

32 alpha : float, optional

33 Damping parameter for PageRank, default=0.85.

35 personalization: dict, optional

36 The "personalization vector" consisting of a dictionary with a

37 key some subset of graph nodes and personalization value each of those.

38 At least one personalization value must be non-zero.

39 If not specified, a nodes personalization value will be zero.

40 By default, a uniform distribution is used.

42 max_iter : integer, optional

43 Maximum number of iterations in power method eigenvalue solver.

45 tol : float, optional

46 Error tolerance used to check convergence in power method solver.

47 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.

49 nstart : dictionary, optional

50 Starting value of PageRank iteration for each node.

52 weight : key, optional

53 Edge data key to use as weight. If None weights are set to 1.

55 dangling: dict, optional

56 The outedges to be assigned to any "dangling" nodes, i.e., nodes without

57 any outedges. The dict key is the node the outedge points to and the dict

58 value is the weight of that outedge. By default, dangling nodes are given

59 outedges according to the personalization vector (uniform if not

60 specified). This must be selected to result in an irreducible transition

61 matrix (see notes under google_matrix). It may be common to have the

62 dangling dict to be the same as the personalization dict.

65 Returns

66 -------

67 pagerank : dictionary

68 Dictionary of nodes with PageRank as value

70 Examples

71 --------

72 >>> G = nx.DiGraph(nx.path_graph(4))

73 >>> pr = nx.pagerank(G, alpha=0.9)

75 Notes

76 -----

77 The eigenvector calculation is done by the power iteration method

78 and has no guarantee of convergence. The iteration will stop after

79 an error tolerance of ``len(G) * tol`` has been reached. If the

80 number of iterations exceed `max_iter`, a

81 :exc:`networkx.exception.PowerIterationFailedConvergence` exception

82 is raised.

84 The PageRank algorithm was designed for directed graphs but this

85 algorithm does not check if the input graph is directed and will

86 execute on undirected graphs by converting each edge in the

87 directed graph to two edges.

89 See Also

90 --------

91 google_matrix

93 Raises

94 ------

95 PowerIterationFailedConvergence

96 If the algorithm fails to converge to the specified tolerance

97 within the specified number of iterations of the power iteration

98 method.

100 References

101 ----------

102 .. [1] A. Langville and C. Meyer,

103 "A survey of eigenvector methods of web information retrieval."

104 http://citeseer.ist.psu.edu/713792.html

105 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,

106 The PageRank citation ranking: Bringing order to the Web. 1999

107 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf

108

109 """

110 return _pagerank_scipy(

111 G, alpha, personalization, max_iter, tol, nstart, weight, dangling

112 )

113

114

115def _pagerank_python(

116 G,

117 alpha=0.85,

118 personalization=None,

119 max_iter=100,

120 tol=1.0e-6,

121 nstart=None,

122 weight="weight",

123 dangling=None,

124):

125 if len(G) == 0:

126 return {}

127

128 D = G.to_directed()

129

130 # Create a copy in (right) stochastic form

131 W = nx.stochastic_graph(D, weight=weight)

132 N = W.number_of_nodes()

133

134 # Choose fixed starting vector if not given

135 if nstart is None:

136 x = dict.fromkeys(W, 1.0 / N)

137 else:

138 # Normalized nstart vector

139 s = sum(nstart.values())

140 x = {k: v / s for k, v in nstart.items()}

141

142 if personalization is None:

143 # Assign uniform personalization vector if not given

144 p = dict.fromkeys(W, 1.0 / N)

145 else:

146 s = sum(personalization.values())

147 p = {k: v / s for k, v in personalization.items()}

148

149 if dangling is None:

150 # Use personalization vector if dangling vector not specified

151 dangling_weights = p

152 else:

153 s = sum(dangling.values())

154 dangling_weights = {k: v / s for k, v in dangling.items()}

155 dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

156

157 # power iteration: make up to max_iter iterations

158 for _ in range(max_iter):

159 xlast = x

160 x = dict.fromkeys(xlast.keys(), 0)

161 danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

162 for n in x:

163 # this matrix multiply looks odd because it is

164 # doing a left multiply x^T=xlast^T*W

165 for _, nbr, wt in W.edges(n, data=weight):

166 x[nbr] += alpha * xlast[n] * wt

167 x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)

168 # check convergence, l1 norm

169 err = sum(abs(x[n] - xlast[n]) for n in x)

170 if err < N * tol:

171 return x

172 raise nx.PowerIterationFailedConvergence(max_iter)

173

174

175@nx._dispatch(edge_attrs="weight")

176def google_matrix(

177 G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None

178):

179 """Returns the Google matrix of the graph.

180

181 Parameters

182 ----------

183 G : graph

184 A NetworkX graph. Undirected graphs will be converted to a directed

185 graph with two directed edges for each undirected edge.

186

187 alpha : float

188 The damping factor.

189

190 personalization: dict, optional

191 The "personalization vector" consisting of a dictionary with a

192 key some subset of graph nodes and personalization value each of those.

193 At least one personalization value must be non-zero.

194 If not specified, a nodes personalization value will be zero.

195 By default, a uniform distribution is used.

196

197 nodelist : list, optional

198 The rows and columns are ordered according to the nodes in nodelist.

199 If nodelist is None, then the ordering is produced by G.nodes().

200

201 weight : key, optional

202 Edge data key to use as weight. If None weights are set to 1.

203

204 dangling: dict, optional

205 The outedges to be assigned to any "dangling" nodes, i.e., nodes without

206 any outedges. The dict key is the node the outedge points to and the dict

207 value is the weight of that outedge. By default, dangling nodes are given

208 outedges according to the personalization vector (uniform if not

209 specified) This must be selected to result in an irreducible transition

210 matrix (see notes below). It may be common to have the dangling dict to

211 be the same as the personalization dict.

212

213 Returns

214 -------

215 A : 2D NumPy ndarray

216 Google matrix of the graph

217

218 Notes

219 -----

220 The array returned represents the transition matrix that describes the

221 Markov chain used in PageRank. For PageRank to converge to a unique

222 solution (i.e., a unique stationary distribution in a Markov chain), the

223 transition matrix must be irreducible. In other words, it must be that

224 there exists a path between every pair of nodes in the graph, or else there

225 is the potential of "rank sinks."

226

227 This implementation works with Multi(Di)Graphs. For multigraphs the

228 weight between two nodes is set to be the sum of all edge weights

229 between those nodes.

230

231 See Also

232 --------

233 pagerank

234 """

235 import numpy as np

236

237 if nodelist is None:

238 nodelist = list(G)

239

240 A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)

241 N = len(G)

242 if N == 0:

243 return A

244

245 # Personalization vector

246 if personalization is None:

247 p = np.repeat(1.0 / N, N)

248 else:

249 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)

250 if p.sum() == 0:

251 raise ZeroDivisionError

252 p /= p.sum()

253

254 # Dangling nodes

255 if dangling is None:

256 dangling_weights = p

257 else:

258 # Convert the dangling dictionary into an array in nodelist order

259 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)

260 dangling_weights /= dangling_weights.sum()

261 dangling_nodes = np.where(A.sum(axis=1) == 0)[0]

262

263 # Assign dangling_weights to any dangling nodes (nodes with no out links)

264 A[dangling_nodes] = dangling_weights

265

266 A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1

267

268 return alpha * A + (1 - alpha) * p

269

270

271def _pagerank_numpy(

272 G, alpha=0.85, personalization=None, weight="weight", dangling=None

273):

274 """Returns the PageRank of the nodes in the graph.

275

276 PageRank computes a ranking of the nodes in the graph G based on

277 the structure of the incoming links. It was originally designed as

278 an algorithm to rank web pages.

279

280 Parameters

281 ----------

282 G : graph

283 A NetworkX graph. Undirected graphs will be converted to a directed

284 graph with two directed edges for each undirected edge.

285

286 alpha : float, optional

287 Damping parameter for PageRank, default=0.85.

288

289 personalization: dict, optional

290 The "personalization vector" consisting of a dictionary with a

291 key some subset of graph nodes and personalization value each of those.

292 At least one personalization value must be non-zero.

293 If not specified, a nodes personalization value will be zero.

294 By default, a uniform distribution is used.

295

296 weight : key, optional

297 Edge data key to use as weight. If None weights are set to 1.

298

299 dangling: dict, optional

300 The outedges to be assigned to any "dangling" nodes, i.e., nodes without

301 any outedges. The dict key is the node the outedge points to and the dict

302 value is the weight of that outedge. By default, dangling nodes are given

303 outedges according to the personalization vector (uniform if not

304 specified) This must be selected to result in an irreducible transition

305 matrix (see notes under google_matrix). It may be common to have the

306 dangling dict to be the same as the personalization dict.

307

308 Returns

309 -------

310 pagerank : dictionary

311 Dictionary of nodes with PageRank as value.

312

313 Examples

314 --------

315 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy

316 >>> G = nx.DiGraph(nx.path_graph(4))

317 >>> pr = _pagerank_numpy(G, alpha=0.9)

318

319 Notes

320 -----

321 The eigenvector calculation uses NumPy's interface to the LAPACK

322 eigenvalue solvers. This will be the fastest and most accurate

323 for small graphs.

324

325 This implementation works with Multi(Di)Graphs. For multigraphs the

326 weight between two nodes is set to be the sum of all edge weights

327 between those nodes.

328

329 See Also

330 --------

331 pagerank, google_matrix

332

333 References

334 ----------

335 .. [1] A. Langville and C. Meyer,

336 "A survey of eigenvector methods of web information retrieval."

337 http://citeseer.ist.psu.edu/713792.html

338 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,

339 The PageRank citation ranking: Bringing order to the Web. 1999

340 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf

341 """

342 import numpy as np

343

344 if len(G) == 0:

345 return {}

346 M = google_matrix(

347 G, alpha, personalization=personalization, weight=weight, dangling=dangling

348 )

349 # use numpy LAPACK solver

350 eigenvalues, eigenvectors = np.linalg.eig(M.T)

351 ind = np.argmax(eigenvalues)

352 # eigenvector of largest eigenvalue is at ind, normalized

353 largest = np.array(eigenvectors[:, ind]).flatten().real

354 norm = largest.sum()

355 return dict(zip(G, map(float, largest / norm)))

356

357

358def _pagerank_scipy(

359 G,

360 alpha=0.85,

361 personalization=None,

362 max_iter=100,

363 tol=1.0e-6,

364 nstart=None,

365 weight="weight",

366 dangling=None,

367):

368 """Returns the PageRank of the nodes in the graph.

369

370 PageRank computes a ranking of the nodes in the graph G based on

371 the structure of the incoming links. It was originally designed as

372 an algorithm to rank web pages.

373

374 Parameters

375 ----------

376 G : graph

377 A NetworkX graph. Undirected graphs will be converted to a directed

378 graph with two directed edges for each undirected edge.

379

380 alpha : float, optional

381 Damping parameter for PageRank, default=0.85.

382

383 personalization: dict, optional

384 The "personalization vector" consisting of a dictionary with a

385 key some subset of graph nodes and personalization value each of those.

386 At least one personalization value must be non-zero.

387 If not specified, a nodes personalization value will be zero.

388 By default, a uniform distribution is used.

389

390 max_iter : integer, optional

391 Maximum number of iterations in power method eigenvalue solver.

392

393 tol : float, optional

394 Error tolerance used to check convergence in power method solver.

395 The iteration will stop after a tolerance of ``len(G) * tol`` is reached.

396

397 nstart : dictionary, optional

398 Starting value of PageRank iteration for each node.

399

400 weight : key, optional

401 Edge data key to use as weight. If None weights are set to 1.

402

403 dangling: dict, optional

404 The outedges to be assigned to any "dangling" nodes, i.e., nodes without

405 any outedges. The dict key is the node the outedge points to and the dict

406 value is the weight of that outedge. By default, dangling nodes are given

407 outedges according to the personalization vector (uniform if not

408 specified) This must be selected to result in an irreducible transition

409 matrix (see notes under google_matrix). It may be common to have the

410 dangling dict to be the same as the personalization dict.

411

412 Returns

413 -------

414 pagerank : dictionary

415 Dictionary of nodes with PageRank as value

416

417 Examples

418 --------

419 >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy

420 >>> G = nx.DiGraph(nx.path_graph(4))

421 >>> pr = _pagerank_scipy(G, alpha=0.9)

422

423 Notes

424 -----

425 The eigenvector calculation uses power iteration with a SciPy

426 sparse matrix representation.

427

428 This implementation works with Multi(Di)Graphs. For multigraphs the

429 weight between two nodes is set to be the sum of all edge weights

430 between those nodes.

431

432 See Also

433 --------

434 pagerank

435

436 Raises

437 ------

438 PowerIterationFailedConvergence

439 If the algorithm fails to converge to the specified tolerance

440 within the specified number of iterations of the power iteration

441 method.

442

443 References

444 ----------

445 .. [1] A. Langville and C. Meyer,

446 "A survey of eigenvector methods of web information retrieval."

447 http://citeseer.ist.psu.edu/713792.html

448 .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,

449 The PageRank citation ranking: Bringing order to the Web. 1999

450 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf

451 """

452 import numpy as np

453 import scipy as sp

454

455 N = len(G)

456 if N == 0:

457 return {}

458

459 nodelist = list(G)

460 A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)

461 S = A.sum(axis=1)

462 S[S != 0] = 1.0 / S[S != 0]

463 # TODO: csr_array

464 Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))

465 A = Q @ A

466

467 # initial vector

468 if nstart is None:

469 x = np.repeat(1.0 / N, N)

470 else:

471 x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)

472 x /= x.sum()

473

474 # Personalization vector

475 if personalization is None:

476 p = np.repeat(1.0 / N, N)

477 else:

478 p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)

479 if p.sum() == 0:

480 raise ZeroDivisionError

481 p /= p.sum()

482 # Dangling nodes

483 if dangling is None:

484 dangling_weights = p

485 else:

486 # Convert the dangling dictionary into an array in nodelist order

487 dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)

488 dangling_weights /= dangling_weights.sum()

489 is_dangling = np.where(S == 0)[0]

490

491 # power iteration: make up to max_iter iterations

492 for _ in range(max_iter):

493 xlast = x

494 x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p

495 # check convergence, l1 norm

496 err = np.absolute(x - xlast).sum()

497 if err < N * tol:

498 return dict(zip(nodelist, map(float, x)))

499 raise nx.PowerIterationFailedConvergence(max_iter)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/link_analysis/pagerank_alg.py: 10%

104 statements