Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/bipartite/link

1import itertools

3import networkx as nx

5__all__ = ["birank"]

8@nx._dispatchable(edge_attrs="weight")

9def birank(

10 G,

11 nodes,

12 *,

13 alpha=None,

14 beta=None,

15 top_personalization=None,

16 bottom_personalization=None,

17 max_iter=100,

18 tol=1.0e-6,

19 weight="weight",

20):

21 r"""Compute the BiRank score for nodes in a bipartite network.

23 Given the bipartite sets $U$ and $P$, the BiRank algorithm seeks to satisfy

24 the following recursive relationships between the scores of nodes $j \in P$

25 and $i \in U$:

27 .. math::

29 p_j = \alpha \sum_{i \in U} \frac{w_{ij}}{\sqrt{d_i}\sqrt{d_j}} u_i

30 + (1 - \alpha) p_j^0

32 u_i = \beta \sum_{j \in P} \frac{w_{ij}}{\sqrt{d_i}\sqrt{d_j}} p_j

33 + (1 - \beta) u_i^0

35 where

37 * $p_j$ and $u_i$ are the BiRank scores of nodes $j \in P$ and $i \in U$.

38 * $w_{ij}$ is the weight of the edge between nodes $i \in U$ and $j \in P$

39 (With a value of 0 if no edge exists).

40 * $d_i$ and $d_j$ are the weighted degrees of nodes $i \in U$ and $j \in P$,

41 respectively.

42 * $p_j^0$ and $u_i^0$ are personalization values that can encode a priori

43 weights for the nodes $j \in P$ and $i \in U$, respectively. Akin to the

44 personalization vector used by PageRank.

45 * $\alpha$ and $\beta$ are damping hyperparameters applying to nodes in $P$

46 and $U$ respectively. They can take values in the interval $[0, 1]$, and

47 are analogous to those used by PageRank.

49 Below are two use cases for this algorithm.

51 1. Personalized Recommendation System

52 Given a bipartite graph representing users and items, BiRank can be used

53 as a collaborative filtering algorithm to recommend items to users.

54 Previous ratings are encoded as edge weights, and the specific ratings

55 of an individual user on a set of items is used as the personalization

56 vector over items. See the example below for an implementation of this

57 on a toy dataset provided in [1]_.

59 2. Popularity Prediction

60 Given a bipartite graph representing user interactions with items, e.g.

61 commits to a GitHub repository, BiRank can be used to predict the

62 popularity of a given item. Edge weights should encode the strength of

63 the interaction signal. This could be a raw count, or weighted by a time

64 decay function like that specified in Eq. (15) of [1]_. The

65 personalization vectors can be used to encode existing popularity

66 signals, for example, the monthly download count of a repository's

67 package.

69 Parameters

70 ----------

71 G : graph

72 A bipartite network

74 nodes : iterable of nodes

75 Container with all nodes belonging to the first bipartite node set

76 ('top'). The nodes in this set use the hyperparameter `alpha`, and the

77 personalization dictionary `top_personalization`. The nodes in the second

78 bipartite node set ('bottom') are automatically determined by taking the

79 complement of 'top' with respect to the graph `G`.

81 alpha : float, optional (default=0.80 if top_personalization not empty, else 1)

82 Damping factor for the 'top' nodes. Must be in the interval $[0, 1]$.

83 Larger alpha and beta generally reduce the effect of the personalizations

84 and increase the number of iterations before convergence. Choice of value

85 is largely dependent on use case, and experimentation is recommended.

87 beta : float, optional (default=0.80 if bottom_personalization not empty, else 1)

88 Damping factor for the 'bottom' nodes. Must be in the interval $[0, 1]$.

89 Larger alpha and beta generally reduce the effect of the personalizations

90 and increase the number of iterations before convergence. Choice of value

91 is largely dependent on use case, and experimentation is recommended.

93 top_personalization : dict, optional (default=None)

94 Dictionary keyed by nodes in 'top' to that node's personalization value.

95 Unspecified nodes in 'top' will be assigned a personalization value of 0.

96 Personalization values are used to encode a priori weights for a given node,

97 and should be non-negative.

99 bottom_personalization : dict, optional (default=None)

100 Dictionary keyed by nodes in 'bottom' to that node's personalization value.

101 Unspecified nodes in 'bottom' will be assigned a personalization value of 0.

102 Personalization values are used to encode a priori weights for a given node,

103 and should be non-negative.

104

105 max_iter : int, optional (default=100)

106 Maximum number of iterations in power method eigenvalue solver.

107

108 tol : float, optional (default=1.0e-6)

109 Error tolerance used to check convergence in power method solver. The

110 iteration will stop after a tolerance of both ``len(top) * tol`` and

111 ``len(bottom) * tol`` is reached for nodes in 'top' and 'bottom'

112 respectively.

113

114 weight : string or None, optional (default='weight')

115 Edge data key to use as weight.

116

117 Returns

118 -------

119 birank : dictionary

120 Dictionary keyed by node to that node's BiRank score.

121

122 Raises

123 ------

124 NetworkXAlgorithmError

125 If the parameters `alpha` or `beta` are not in the interval [0, 1],

126 if either of the bipartite sets are empty, or if negative values are

127 provided in the personalization dictionaries.

128

129 PowerIterationFailedConvergence

130 If the algorithm fails to converge to the specified tolerance

131 within the specified number of iterations of the power iteration

132 method.

133

134 Examples

135 --------

136 Construct a bipartite graph with user-item ratings and use BiRank to

137 recommend items to a user (user 1). The example below uses the `rating`

138 edge attribute as the weight of the edges. The `top_personalization` vector

139 is used to encode the user's previous ratings on items.

140

141 Creation of graph, bipartite sets for the example.

142

143 >>> elist = [

144 ... ("u1", "p1", 5),

145 ... ("u2", "p1", 5),

146 ... ("u2", "p2", 4),

147 ... ("u3", "p1", 3),

148 ... ("u3", "p3", 2),

149 ... ]

150 >>> G = nx.Graph()

151 >>> G.add_weighted_edges_from(elist, weight="rating")

152 >>> product_nodes = ("p1", "p2", "p3")

153 >>> user = "u1"

154

155 First, we create a personalization vector for the user based on on their

156 ratings of past items. In this case they have only rated one item (p1, with

157 a rating of 5) in the past.

158

159 >>> user_personalization = {

160 ... product: rating

161 ... for _, product, rating in G.edges(nbunch=user, data="rating")

162 ... }

163 >>> user_personalization

164 {'p1': 5}

165

166 Calculate the BiRank score of all nodes in the graph, filter for the items

167 that the user has not rated yet, and sort the results by score.

168

169 >>> user_birank_results = nx.bipartite.birank(

170 ... G, product_nodes, top_personalization=user_personalization, weight="rating"

171 ... )

172 >>> user_birank_results = filter(

173 ... lambda item: item[0][0] == "p" and user not in G.neighbors(item[0]),

174 ... user_birank_results.items(),

175 ... )

176 >>> user_birank_results = sorted(

177 ... user_birank_results, key=lambda item: item[1], reverse=True

178 ... )

179 >>> user_recommendations = {

180 ... product: round(score, 5) for product, score in user_birank_results

181 ... }

182 >>> user_recommendations

183 {'p2': 1.44818, 'p3': 1.04811}

184

185 We find that user 1 should be recommended item p2 over item p3. This is due

186 to the fact that user 2 rated also rated p1 highly, while user 3 did not.

187 Thus user 2's tastes are inferred to be similar to user 1's, and carry more

188 weight in the recommendation.

189

190 See Also

191 --------

192 :func:`~networkx.algorithms.link_analysis.pagerank_alg.pagerank`

193 :func:`~networkx.algorithms.link_analysis.hits_alg.hits`

194 :func:`~networkx.algorithms.bipartite.centrality.betweenness_centrality`

195 :func:`~networkx.algorithms.bipartite.basic.sets`

196 :func:`~networkx.algorithms.bipartite.basic.is_bipartite`

197

198 Notes

199 -----

200 The `nodes` input parameter must contain all nodes in one bipartite

201 node set, but the dictionary returned contains all nodes from both

202 bipartite node sets. See :mod:`bipartite documentation

203 <networkx.algorithms.bipartite>` for further details on how

204 bipartite graphs are handled in NetworkX.

205

206 In the case a personalization dictionary is not provided for top (bottom)

207 `alpha` (`beta`) will default to 1. This is because a damping factor

208 without a non-zero entry in the personalization vector will lead to the

209 algorithm converging to the zero vector.

210

211 References

212 ----------

213 .. [1] Xiangnan He, Ming Gao, Min-Yen Kan, and Dingxian Wang. 2017.

214 BiRank: Towards Ranking on Bipartite Graphs. IEEE Trans. on Knowl.

215 and Data Eng. 29, 1 (January 2017), 57–71.

216 https://arxiv.org/pdf/1708.04396

217

218 """

219 import numpy as np

220 import scipy as sp

221

222 # Initialize the sets of top and bottom nodes

223 top = set(nodes)

224 bottom = set(G) - top

225 top_count = len(top)

226 bottom_count = len(bottom)

227

228 if top_count == 0 or bottom_count == 0:

229 raise nx.NetworkXAlgorithmError(

230 "The BiRank algorithm requires a bipartite graph with at least one"

231 "node in each set."

232 )

233

234 # Clean the personalization dictionaries

235 top_personalization = _clean_personalization_dict(top_personalization)

236 bottom_personalization = _clean_personalization_dict(bottom_personalization)

237

238 # Set default values for alpha and beta if not provided

239 if alpha is None:

240 alpha = 0.8 if top_personalization else 1

241 if beta is None:

242 beta = 0.8 if bottom_personalization else 1

243

244 if alpha < 0 or alpha > 1:

245 raise nx.NetworkXAlgorithmError("alpha must be in the interval [0, 1]")

246 if beta < 0 or beta > 1:

247 raise nx.NetworkXAlgorithmError("beta must be in the interval [0, 1]")

248

249 # Initialize query vectors

250 p0 = np.array([top_personalization.get(n, 0) for n in top], dtype=float)

251 u0 = np.array([bottom_personalization.get(n, 0) for n in bottom], dtype=float)

252

253 # Construct degree normalized biadjacency matrix `S` and its transpose

254 W = nx.bipartite.biadjacency_matrix(G, bottom, top, weight=weight, dtype=float)

255 p_degrees = W.sum(axis=0, dtype=float)

256 # Handle case where the node is disconnected - avoids warning

257 p_degrees[p_degrees == 0] = 1.0

258 D_p = sp.sparse.dia_array(

259 ([1.0 / np.sqrt(p_degrees)], [0]),

260 shape=(top_count, top_count),

261 dtype=float,

262 )

263 u_degrees = W.sum(axis=1, dtype=float)

264 u_degrees[u_degrees == 0] = 1.0

265 D_u = sp.sparse.dia_array(

266 ([1.0 / np.sqrt(u_degrees)], [0]),

267 shape=(bottom_count, bottom_count),

268 dtype=float,

269 )

270 S = D_u.tocsr() @ W @ D_p.tocsr()

271 S_T = S.T

272

273 # Initialize birank vectors for iteration

274 p = np.ones(top_count, dtype=float) / top_count

275 u = beta * (S @ p) + (1 - beta) * u0

276

277 # Iterate until convergence

278 for _ in range(max_iter):

279 p_last = p

280 u_last = u

281 p = alpha * (S_T @ u) + (1 - alpha) * p0

282 u = beta * (S @ p) + (1 - beta) * u0

283

284 # Continue iterating if the error (absolute if less than 1, relative otherwise)

285 # is above the tolerance threshold for either p or u

286 err_u = np.absolute((u_last - u) / np.maximum(1.0, u_last)).sum()

287 if err_u >= len(u) * tol:

288 continue

289 err_p = np.absolute((p_last - p) / np.maximum(1.0, p_last)).sum()

290 if err_p >= len(p) * tol:

291 continue

292

293 # Handle edge case where if both alpha and beta are 1, scale is

294 # indeterminate, so normalization is required to return consistent results

295 if alpha == 1 and beta == 1:

296 p = p / np.linalg.norm(p, 1)

297 u = u / np.linalg.norm(u, 1)

298

299 # If both error thresholds pass, return a single dictionary mapping

300 # nodes to their scores

301 return dict(

302 zip(itertools.chain(top, bottom), map(float, itertools.chain(p, u)))

303 )

304

305 # If we reach this point, we have not converged

306 raise nx.PowerIterationFailedConvergence(max_iter)

307

308

309def _clean_personalization_dict(personalization):

310 """Filter out zero values from the personalization dictionary,

311 handle case where None is passed, ensure values are non-negative."""

312 if personalization is None:

313 return {}

314 if any(value < 0 for value in personalization.values()):

315 raise nx.NetworkXAlgorithmError("Personalization values must be non-negative.")

316 return {node: value for node, value in personalization.items() if value != 0}

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/bipartite/link_analysis.py: 10%

58 statements